xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 7b6bb2c608b6fc6714ef38fda02c2dbb91c82665)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 EXTERN_C_BEGIN
8 #undef __FUNCT__
9 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ"
10 /*
11       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
12 
13       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
14 */
15 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
16 {
17   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
18   PetscErrorCode    ierr;
19   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
20   const PetscInt    *ai = a->i, *aj = a->j;
21   const PetscScalar *aa = a->a;
22   PetscBool         *done;
23   PetscReal         best,past = 0,future;
24 
25   PetscFunctionBegin;
26   /* pick initial row */
27   best = -1;
28   for (i=0; i<n; i++) {
29     future = 0.0;
30     for (j=ai[i]; j<ai[i+1]; j++) {
31       if (aj[j] != i) future  += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]);
32     }
33     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34     if (past/future > best) {
35       best = past/future;
36       current = i;
37     }
38   }
39 
40   ierr = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr);
41   ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr);
42   ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr);
43   order[0] = current;
44   for (i=0; i<n-1; i++) {
45     done[current] = PETSC_TRUE;
46     best          = -1;
47     /* loop over all neighbors of current pivot */
48     for (j=ai[current]; j<ai[current+1]; j++) {
49       jj = aj[j];
50       if (done[jj]) continue;
51       /* loop over columns of potential next row computing weights for below and above diagonal */
52       past = future = 0.0;
53       for (k=ai[jj]; k<ai[jj+1]; k++) {
54         kk = aj[k];
55         if (done[kk]) past += PetscAbsScalar(aa[k]);
56         else if (kk != jj) future  += PetscAbsScalar(aa[k]);
57       }
58       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59       if (past/future > best) {
60         best = past/future;
61         newcurrent = jj;
62       }
63     }
64     if (best == -1) { /* no neighbors to select from so select best of all that remain */
65       best = -1;
66       for (k=0; k<n; k++) {
67         if (done[k]) continue;
68         future = 0.0;
69         past   = 0.0;
70         for (j=ai[k]; j<ai[k+1]; j++) {
71           kk = aj[j];
72           if (done[kk]) past += PetscAbsScalar(aa[j]);
73           else if (kk != k) future  += PetscAbsScalar(aa[j]);
74         }
75         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76         if (past/future > best) {
77           best = past/future;
78           newcurrent = k;
79         }
80       }
81     }
82     if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
83     current = newcurrent;
84     order[i+1] = current;
85   }
86   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr);
87   *icol = *irow;
88   ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr);
89   ierr = PetscFree(done);CHKERRQ(ierr);
90   ierr = PetscFree(order);CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 EXTERN_C_END
94 
95 EXTERN_C_BEGIN
96 #undef __FUNCT__
97 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
98 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool  *flg)
99 {
100   PetscFunctionBegin;
101   *flg = PETSC_TRUE;
102   PetscFunctionReturn(0);
103 }
104 EXTERN_C_END
105 
106 EXTERN_C_BEGIN
107 #undef __FUNCT__
108 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
109 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
110 {
111   PetscInt           n = A->rmap->n;
112   PetscErrorCode     ierr;
113 
114   PetscFunctionBegin;
115   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
116   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
117   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
118     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
119     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
120     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
121   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
122     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
123     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
124     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
125     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
126   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
127   (*B)->factortype = ftype;
128   PetscFunctionReturn(0);
129 }
130 EXTERN_C_END
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace"
134 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
135 {
136   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
137   IS                 isicol;
138   PetscErrorCode     ierr;
139   const PetscInt     *r,*ic;
140   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
141   PetscInt           *bi,*bj,*ajtmp;
142   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
143   PetscReal          f;
144   PetscInt           nlnk,*lnk,k,**bi_ptr;
145   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
146   PetscBT            lnkbt;
147 
148   PetscFunctionBegin;
149   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
150   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
151   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
152   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
153 
154   /* get new row pointers */
155   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
156   bi[0] = 0;
157 
158   /* bdiag is location of diagonal in factor */
159   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
160   bdiag[0] = 0;
161 
162   /* linked list for storing column indices of the active row */
163   nlnk = n + 1;
164   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
165 
166   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
167 
168   /* initial FreeSpace size is f*(ai[n]+1) */
169   f = info->fill;
170   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
171   current_space = free_space;
172 
173   for (i=0; i<n; i++) {
174     /* copy previous fill into linked list */
175     nzi = 0;
176     nnz = ai[r[i]+1] - ai[r[i]];
177     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
178     ajtmp = aj + ai[r[i]];
179     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
180     nzi += nlnk;
181 
182     /* add pivot rows into linked list */
183     row = lnk[n];
184     while (row < i) {
185       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
186       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
187       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
188       nzi += nlnk;
189       row  = lnk[row];
190     }
191     bi[i+1] = bi[i] + nzi;
192     im[i]   = nzi;
193 
194     /* mark bdiag */
195     nzbd = 0;
196     nnz  = nzi;
197     k    = lnk[n];
198     while (nnz-- && k < i){
199       nzbd++;
200       k = lnk[k];
201     }
202     bdiag[i] = bi[i] + nzbd;
203 
204     /* if free space is not available, make more free space */
205     if (current_space->local_remaining<nzi) {
206       nnz = (n - i)*nzi; /* estimated and max additional space needed */
207       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
208       reallocs++;
209     }
210 
211     /* copy data into free space, then initialize lnk */
212     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
213     bi_ptr[i] = current_space->array;
214     current_space->array           += nzi;
215     current_space->local_used      += nzi;
216     current_space->local_remaining -= nzi;
217   }
218 #if defined(PETSC_USE_INFO)
219   if (ai[n] != 0) {
220     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
221     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
222     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
223     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
224     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
225   } else {
226     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
227   }
228 #endif
229 
230   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
231   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
232 
233   /* destroy list of free space and other temporary array(s) */
234   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
235   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
236   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
237   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
238 
239   /* put together the new matrix */
240   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
241   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
242   b    = (Mat_SeqAIJ*)(B)->data;
243   b->free_a       = PETSC_TRUE;
244   b->free_ij      = PETSC_TRUE;
245   b->singlemalloc = PETSC_FALSE;
246   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
247   b->j          = bj;
248   b->i          = bi;
249   b->diag       = bdiag;
250   b->ilen       = 0;
251   b->imax       = 0;
252   b->row        = isrow;
253   b->col        = iscol;
254   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
255   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
256   b->icol       = isicol;
257   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
258 
259   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
260   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
261   b->maxnz = b->nz = bi[n] ;
262 
263   (B)->factortype            = MAT_FACTOR_LU;
264   (B)->info.factor_mallocs   = reallocs;
265   (B)->info.fill_ratio_given = f;
266 
267   if (ai[n]) {
268     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
269   } else {
270     (B)->info.fill_ratio_needed = 0.0;
271   }
272   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_inplace;
273   if (a->inode.size) {
274     (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
275   }
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
281 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
282 {
283   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
284   IS                 isicol;
285   PetscErrorCode     ierr;
286   const PetscInt     *r,*ic;
287   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
288   PetscInt           *bi,*bj,*ajtmp;
289   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
290   PetscReal          f;
291   PetscInt           nlnk,*lnk,k,**bi_ptr;
292   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
293   PetscBT            lnkbt;
294   PetscBool          olddatastruct=PETSC_FALSE;
295 
296   PetscFunctionBegin;
297   /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */
298   ierr = PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
299   if(olddatastruct){
300     ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr);
301     PetscFunctionReturn(0);
302   }
303 
304   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
305   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
306   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
307   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
308 
309   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
310   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
311   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
312   bi[0] = bdiag[0] = 0;
313 
314   /* linked list for storing column indices of the active row */
315   nlnk = n + 1;
316   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
317 
318   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
319 
320   /* initial FreeSpace size is f*(ai[n]+1) */
321   f = info->fill;
322   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
323   current_space = free_space;
324 
325   for (i=0; i<n; i++) {
326     /* copy previous fill into linked list */
327     nzi = 0;
328     nnz = ai[r[i]+1] - ai[r[i]];
329     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
330     ajtmp = aj + ai[r[i]];
331     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
332     nzi += nlnk;
333 
334     /* add pivot rows into linked list */
335     row = lnk[n];
336     while (row < i){
337       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
338       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
339       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
340       nzi  += nlnk;
341       row   = lnk[row];
342     }
343     bi[i+1] = bi[i] + nzi;
344     im[i]   = nzi;
345 
346     /* mark bdiag */
347     nzbd = 0;
348     nnz  = nzi;
349     k    = lnk[n];
350     while (nnz-- && k < i){
351       nzbd++;
352       k = lnk[k];
353     }
354     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
355 
356     /* if free space is not available, make more free space */
357     if (current_space->local_remaining<nzi) {
358       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
359       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
360       reallocs++;
361     }
362 
363     /* copy data into free space, then initialize lnk */
364     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
365     bi_ptr[i] = current_space->array;
366     current_space->array           += nzi;
367     current_space->local_used      += nzi;
368     current_space->local_remaining -= nzi;
369   }
370 
371   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
372   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
373 
374   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
375   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
376   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
377   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
378   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
379 
380   /* put together the new matrix */
381   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
382   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
383   b    = (Mat_SeqAIJ*)(B)->data;
384   b->free_a       = PETSC_TRUE;
385   b->free_ij      = PETSC_TRUE;
386   b->singlemalloc = PETSC_FALSE;
387   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
388   b->j          = bj;
389   b->i          = bi;
390   b->diag       = bdiag;
391   b->ilen       = 0;
392   b->imax       = 0;
393   b->row        = isrow;
394   b->col        = iscol;
395   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
396   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
397   b->icol       = isicol;
398   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
399 
400   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
401   ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
402   b->maxnz = b->nz = bdiag[0]+1;
403   B->factortype            = MAT_FACTOR_LU;
404   B->info.factor_mallocs   = reallocs;
405   B->info.fill_ratio_given = f;
406 
407   if (ai[n]) {
408     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
409   } else {
410     B->info.fill_ratio_needed = 0.0;
411   }
412 #if defined(PETSC_USE_INFO)
413   if (ai[n] != 0) {
414     PetscReal af = B->info.fill_ratio_needed;
415     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
416     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
417     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
418     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
419   } else {
420     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
421   }
422 #endif
423   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
424   if (a->inode.size) {
425     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
426   }
427   PetscFunctionReturn(0);
428 }
429 
430 /*
431     Trouble in factorization, should we dump the original matrix?
432 */
433 #undef __FUNCT__
434 #define __FUNCT__ "MatFactorDumpMatrix"
435 PetscErrorCode MatFactorDumpMatrix(Mat A)
436 {
437   PetscErrorCode ierr;
438   PetscBool      flg = PETSC_FALSE;
439 
440   PetscFunctionBegin;
441   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr);
442   if (flg) {
443     PetscViewer viewer;
444     char        filename[PETSC_MAX_PATH_LEN];
445 
446     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
447     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
448     ierr = MatView(A,viewer);CHKERRQ(ierr);
449     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
450   }
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
456 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
457 {
458   Mat              C=B;
459   Mat_SeqAIJ       *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
460   IS               isrow = b->row,isicol = b->icol;
461   PetscErrorCode   ierr;
462   const PetscInt   *r,*ic,*ics;
463   const PetscInt   n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
464   PetscInt         i,j,k,nz,nzL,row,*pj;
465   const PetscInt   *ajtmp,*bjtmp;
466   MatScalar        *rtmp,*pc,multiplier,*pv;
467   const  MatScalar *aa=a->a,*v;
468   PetscBool        row_identity,col_identity;
469   FactorShiftCtx   sctx;
470   const PetscInt   *ddiag;
471   PetscReal        rs;
472   MatScalar        d;
473 
474   PetscFunctionBegin;
475   /* MatPivotSetUp(): initialize shift context sctx */
476   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
477 
478   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
479     ddiag          = a->diag;
480     sctx.shift_top = info->zeropivot;
481     for (i=0; i<n; i++) {
482       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
483       d  = (aa)[ddiag[i]];
484       rs = -PetscAbsScalar(d) - PetscRealPart(d);
485       v  = aa+ai[i];
486       nz = ai[i+1] - ai[i];
487       for (j=0; j<nz; j++)
488 	rs += PetscAbsScalar(v[j]);
489       if (rs>sctx.shift_top) sctx.shift_top = rs;
490     }
491     sctx.shift_top   *= 1.1;
492     sctx.nshift_max   = 5;
493     sctx.shift_lo     = 0.;
494     sctx.shift_hi     = 1.;
495   }
496 
497   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
498   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
499   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
500   ics  = ic;
501 
502   do {
503     sctx.newshift = PETSC_FALSE;
504     for (i=0; i<n; i++){
505       /* zero rtmp */
506       /* L part */
507       nz    = bi[i+1] - bi[i];
508       bjtmp = bj + bi[i];
509       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
510 
511       /* U part */
512       nz = bdiag[i]-bdiag[i+1];
513       bjtmp = bj + bdiag[i+1]+1;
514       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
515 
516       /* load in initial (unfactored row) */
517       nz    = ai[r[i]+1] - ai[r[i]];
518       ajtmp = aj + ai[r[i]];
519       v     = aa + ai[r[i]];
520       for (j=0; j<nz; j++) {
521         rtmp[ics[ajtmp[j]]] = v[j];
522       }
523       /* ZeropivotApply() */
524       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
525 
526       /* elimination */
527       bjtmp = bj + bi[i];
528       row   = *bjtmp++;
529       nzL   = bi[i+1] - bi[i];
530       for(k=0; k < nzL;k++) {
531         pc = rtmp + row;
532         if (*pc != 0.0) {
533           pv         = b->a + bdiag[row];
534           multiplier = *pc * (*pv);
535           *pc        = multiplier;
536           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
537 	  pv = b->a + bdiag[row+1]+1;
538 	  nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
539           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
540           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
541         }
542         row = *bjtmp++;
543       }
544 
545       /* finished row so stick it into b->a */
546       rs = 0.0;
547       /* L part */
548       pv   = b->a + bi[i] ;
549       pj   = b->j + bi[i] ;
550       nz   = bi[i+1] - bi[i];
551       for (j=0; j<nz; j++) {
552         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
553       }
554 
555       /* U part */
556       pv = b->a + bdiag[i+1]+1;
557       pj = b->j + bdiag[i+1]+1;
558       nz = bdiag[i] - bdiag[i+1]-1;
559       for (j=0; j<nz; j++) {
560         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
561       }
562 
563       sctx.rs  = rs;
564       sctx.pv  = rtmp[i];
565       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
566       if(sctx.newshift) break; /* break for-loop */
567       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
568 
569       /* Mark diagonal and invert diagonal for simplier triangular solves */
570       pv  = b->a + bdiag[i];
571       *pv = 1.0/rtmp[i];
572 
573     } /* endof for (i=0; i<n; i++){ */
574 
575     /* MatPivotRefine() */
576     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){
577       /*
578        * if no shift in this attempt & shifting & started shifting & can refine,
579        * then try lower shift
580        */
581       sctx.shift_hi       = sctx.shift_fraction;
582       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
583       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
584       sctx.newshift       = PETSC_TRUE;
585       sctx.nshift++;
586     }
587   } while (sctx.newshift);
588 
589   ierr = PetscFree(rtmp);CHKERRQ(ierr);
590   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
591   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
592 
593   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
594   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
595   if (row_identity && col_identity) {
596     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
597   } else {
598     C->ops->solve = MatSolve_SeqAIJ;
599   }
600   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
601   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
602   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
603   C->ops->matsolve           = MatMatSolve_SeqAIJ;
604   C->assembled    = PETSC_TRUE;
605   C->preallocated = PETSC_TRUE;
606   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
607 
608   /* MatShiftView(A,info,&sctx) */
609   if (sctx.nshift){
610     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
611       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
612     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
613       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
614     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
615       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
616     }
617   }
618   ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr);
619   PetscFunctionReturn(0);
620 }
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace"
624 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
625 {
626   Mat             C=B;
627   Mat_SeqAIJ      *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
628   IS              isrow = b->row,isicol = b->icol;
629   PetscErrorCode  ierr;
630   const PetscInt   *r,*ic,*ics;
631   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
632   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
633   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
634   MatScalar       *pv,*rtmp,*pc,multiplier,d;
635   const MatScalar *v,*aa=a->a;
636   PetscReal       rs=0.0;
637   FactorShiftCtx  sctx;
638   const PetscInt  *ddiag;
639   PetscBool       row_identity, col_identity;
640 
641   PetscFunctionBegin;
642   /* MatPivotSetUp(): initialize shift context sctx */
643   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
644 
645   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
646     ddiag          = a->diag;
647     sctx.shift_top = info->zeropivot;
648     for (i=0; i<n; i++) {
649       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
650       d  = (aa)[ddiag[i]];
651       rs = -PetscAbsScalar(d) - PetscRealPart(d);
652       v  = aa+ai[i];
653       nz = ai[i+1] - ai[i];
654       for (j=0; j<nz; j++)
655 	rs += PetscAbsScalar(v[j]);
656       if (rs>sctx.shift_top) sctx.shift_top = rs;
657     }
658     sctx.shift_top   *= 1.1;
659     sctx.nshift_max   = 5;
660     sctx.shift_lo     = 0.;
661     sctx.shift_hi     = 1.;
662   }
663 
664   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
665   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
666   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
667   ics  = ic;
668 
669   do {
670     sctx.newshift = PETSC_FALSE;
671     for (i=0; i<n; i++){
672       nz    = bi[i+1] - bi[i];
673       bjtmp = bj + bi[i];
674       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
675 
676       /* load in initial (unfactored row) */
677       nz    = ai[r[i]+1] - ai[r[i]];
678       ajtmp = aj + ai[r[i]];
679       v     = aa + ai[r[i]];
680       for (j=0; j<nz; j++) {
681         rtmp[ics[ajtmp[j]]] = v[j];
682       }
683       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
684 
685       row = *bjtmp++;
686       while  (row < i) {
687         pc = rtmp + row;
688         if (*pc != 0.0) {
689           pv         = b->a + diag_offset[row];
690           pj         = b->j + diag_offset[row] + 1;
691           multiplier = *pc / *pv++;
692           *pc        = multiplier;
693           nz         = bi[row+1] - diag_offset[row] - 1;
694           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
695           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
696         }
697         row = *bjtmp++;
698       }
699       /* finished row so stick it into b->a */
700       pv   = b->a + bi[i] ;
701       pj   = b->j + bi[i] ;
702       nz   = bi[i+1] - bi[i];
703       diag = diag_offset[i] - bi[i];
704       rs   = 0.0;
705       for (j=0; j<nz; j++) {
706         pv[j] = rtmp[pj[j]];
707         rs   += PetscAbsScalar(pv[j]);
708       }
709       rs   -= PetscAbsScalar(pv[diag]);
710 
711       sctx.rs  = rs;
712       sctx.pv  = pv[diag];
713       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
714       if (sctx.newshift) break;
715       pv[diag] = sctx.pv;
716     }
717 
718     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
719       /*
720        * if no shift in this attempt & shifting & started shifting & can refine,
721        * then try lower shift
722        */
723       sctx.shift_hi       = sctx.shift_fraction;
724       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
725       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
726       sctx.newshift       = PETSC_TRUE;
727       sctx.nshift++;
728     }
729   } while (sctx.newshift);
730 
731   /* invert diagonal entries for simplier triangular solves */
732   for (i=0; i<n; i++) {
733     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
734   }
735   ierr = PetscFree(rtmp);CHKERRQ(ierr);
736   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
737   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
738 
739   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
740   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
741   if (row_identity && col_identity) {
742     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering_inplace;
743   } else {
744     C->ops->solve   = MatSolve_SeqAIJ_inplace;
745   }
746   C->ops->solveadd           = MatSolveAdd_SeqAIJ_inplace;
747   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ_inplace;
748   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ_inplace;
749   C->ops->matsolve           = MatMatSolve_SeqAIJ_inplace;
750   C->assembled    = PETSC_TRUE;
751   C->preallocated = PETSC_TRUE;
752   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
753   if (sctx.nshift){
754      if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
755       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
756     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
757       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
758     }
759   }
760   (C)->ops->solve            = MatSolve_SeqAIJ_inplace;
761   (C)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ_inplace;
762   ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr);
763   PetscFunctionReturn(0);
764 }
765 
766 /*
767    This routine implements inplace ILU(0) with row or/and column permutations.
768    Input:
769      A - original matrix
770    Output;
771      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
772          a->j (col index) is permuted by the inverse of colperm, then sorted
773          a->a reordered accordingly with a->j
774          a->diag (ptr to diagonal elements) is updated.
775 */
776 #undef __FUNCT__
777 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
778 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
779 {
780   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
781   IS             isrow = a->row,isicol = a->icol;
782   PetscErrorCode ierr;
783   const PetscInt *r,*ic,*ics;
784   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
785   PetscInt       *ajtmp,nz,row;
786   PetscInt       *diag = a->diag,nbdiag,*pj;
787   PetscScalar    *rtmp,*pc,multiplier,d;
788   MatScalar      *pv,*v;
789   PetscReal      rs;
790   FactorShiftCtx sctx;
791   const  MatScalar *aa=a->a,*vtmp;
792 
793   PetscFunctionBegin;
794   if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
795 
796   /* MatPivotSetUp(): initialize shift context sctx */
797   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
798 
799   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
800     const PetscInt   *ddiag = a->diag;
801     sctx.shift_top = info->zeropivot;
802     for (i=0; i<n; i++) {
803       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
804       d  = (aa)[ddiag[i]];
805       rs = -PetscAbsScalar(d) - PetscRealPart(d);
806       vtmp  = aa+ai[i];
807       nz = ai[i+1] - ai[i];
808       for (j=0; j<nz; j++)
809 	rs += PetscAbsScalar(vtmp[j]);
810       if (rs>sctx.shift_top) sctx.shift_top = rs;
811     }
812     sctx.shift_top   *= 1.1;
813     sctx.nshift_max   = 5;
814     sctx.shift_lo     = 0.;
815     sctx.shift_hi     = 1.;
816   }
817 
818   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
819   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
820   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
821   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
822   ics = ic;
823 
824 #if defined(MV)
825   sctx.shift_top      = 0.;
826   sctx.nshift_max     = 0;
827   sctx.shift_lo       = 0.;
828   sctx.shift_hi       = 0.;
829   sctx.shift_fraction = 0.;
830 
831   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
832     sctx.shift_top = 0.;
833     for (i=0; i<n; i++) {
834       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
835       d  = (a->a)[diag[i]];
836       rs = -PetscAbsScalar(d) - PetscRealPart(d);
837       v  = a->a+ai[i];
838       nz = ai[i+1] - ai[i];
839       for (j=0; j<nz; j++)
840 	rs += PetscAbsScalar(v[j]);
841       if (rs>sctx.shift_top) sctx.shift_top = rs;
842     }
843     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
844     sctx.shift_top    *= 1.1;
845     sctx.nshift_max   = 5;
846     sctx.shift_lo     = 0.;
847     sctx.shift_hi     = 1.;
848   }
849 
850   sctx.shift_amount = 0.;
851   sctx.nshift       = 0;
852 #endif
853 
854   do {
855     sctx.newshift = PETSC_FALSE;
856     for (i=0; i<n; i++){
857       /* load in initial unfactored row */
858       nz    = ai[r[i]+1] - ai[r[i]];
859       ajtmp = aj + ai[r[i]];
860       v     = a->a + ai[r[i]];
861       /* sort permuted ajtmp and values v accordingly */
862       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
863       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
864 
865       diag[r[i]] = ai[r[i]];
866       for (j=0; j<nz; j++) {
867         rtmp[ajtmp[j]] = v[j];
868         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
869       }
870       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
871 
872       row = *ajtmp++;
873       while  (row < i) {
874         pc = rtmp + row;
875         if (*pc != 0.0) {
876           pv         = a->a + diag[r[row]];
877           pj         = aj + diag[r[row]] + 1;
878 
879           multiplier = *pc / *pv++;
880           *pc        = multiplier;
881           nz         = ai[r[row]+1] - diag[r[row]] - 1;
882           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
883           ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
884         }
885         row = *ajtmp++;
886       }
887       /* finished row so overwrite it onto a->a */
888       pv   = a->a + ai[r[i]] ;
889       pj   = aj + ai[r[i]] ;
890       nz   = ai[r[i]+1] - ai[r[i]];
891       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
892 
893       rs   = 0.0;
894       for (j=0; j<nz; j++) {
895         pv[j] = rtmp[pj[j]];
896         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
897       }
898 
899       sctx.rs  = rs;
900       sctx.pv  = pv[nbdiag];
901       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
902       if (sctx.newshift) break;
903       pv[nbdiag] = sctx.pv;
904     }
905 
906     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
907       /*
908        * if no shift in this attempt & shifting & started shifting & can refine,
909        * then try lower shift
910        */
911       sctx.shift_hi        = sctx.shift_fraction;
912       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
913       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
914       sctx.newshift         = PETSC_TRUE;
915       sctx.nshift++;
916     }
917   } while (sctx.newshift);
918 
919   /* invert diagonal entries for simplier triangular solves */
920   for (i=0; i<n; i++) {
921     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
922   }
923 
924   ierr = PetscFree(rtmp);CHKERRQ(ierr);
925   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
926   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
927   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
928   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
929   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
930   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
931   A->assembled = PETSC_TRUE;
932   A->preallocated = PETSC_TRUE;
933   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
934   if (sctx.nshift){
935     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
936       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
937     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
938       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
939     }
940   }
941   PetscFunctionReturn(0);
942 }
943 
944 /* ----------------------------------------------------------- */
945 #undef __FUNCT__
946 #define __FUNCT__ "MatLUFactor_SeqAIJ"
947 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
948 {
949   PetscErrorCode ierr;
950   Mat            C;
951 
952   PetscFunctionBegin;
953   ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
954   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
955   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
956   A->ops->solve            = C->ops->solve;
957   A->ops->solvetranspose   = C->ops->solvetranspose;
958   ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
959   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
960   PetscFunctionReturn(0);
961 }
962 /* ----------------------------------------------------------- */
963 
964 
965 #undef __FUNCT__
966 #define __FUNCT__ "MatSolve_SeqAIJ_inplace"
967 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
968 {
969   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
970   IS                iscol = a->col,isrow = a->row;
971   PetscErrorCode    ierr;
972   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
973   PetscInt          nz;
974   const PetscInt    *rout,*cout,*r,*c;
975   PetscScalar       *x,*tmp,*tmps,sum;
976   const PetscScalar *b;
977   const MatScalar   *aa = a->a,*v;
978 
979   PetscFunctionBegin;
980   if (!n) PetscFunctionReturn(0);
981 
982   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
983   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
984   tmp  = a->solve_work;
985 
986   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
987   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
988 
989   /* forward solve the lower triangular */
990   tmp[0] = b[*r++];
991   tmps   = tmp;
992   for (i=1; i<n; i++) {
993     v   = aa + ai[i] ;
994     vi  = aj + ai[i] ;
995     nz  = a->diag[i] - ai[i];
996     sum = b[*r++];
997     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
998     tmp[i] = sum;
999   }
1000 
1001   /* backward solve the upper triangular */
1002   for (i=n-1; i>=0; i--){
1003     v   = aa + a->diag[i] + 1;
1004     vi  = aj + a->diag[i] + 1;
1005     nz  = ai[i+1] - a->diag[i] - 1;
1006     sum = tmp[i];
1007     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1008     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1009   }
1010 
1011   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1012   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1013   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1014   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1015   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 #undef __FUNCT__
1020 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace"
1021 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1022 {
1023   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1024   IS              iscol = a->col,isrow = a->row;
1025   PetscErrorCode  ierr;
1026   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1027   PetscInt        nz,neq;
1028   const PetscInt  *rout,*cout,*r,*c;
1029   PetscScalar     *x,*b,*tmp,*tmps,sum;
1030   const MatScalar *aa = a->a,*v;
1031   PetscBool       bisdense,xisdense;
1032 
1033   PetscFunctionBegin;
1034   if (!n) PetscFunctionReturn(0);
1035 
1036   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1037   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1038   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1039   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1040 
1041   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
1042   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
1043 
1044   tmp  = a->solve_work;
1045   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1046   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1047 
1048   for (neq=0; neq<B->cmap->n; neq++){
1049     /* forward solve the lower triangular */
1050     tmp[0] = b[r[0]];
1051     tmps   = tmp;
1052     for (i=1; i<n; i++) {
1053       v   = aa + ai[i] ;
1054       vi  = aj + ai[i] ;
1055       nz  = a->diag[i] - ai[i];
1056       sum = b[r[i]];
1057       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1058       tmp[i] = sum;
1059     }
1060     /* backward solve the upper triangular */
1061     for (i=n-1; i>=0; i--){
1062       v   = aa + a->diag[i] + 1;
1063       vi  = aj + a->diag[i] + 1;
1064       nz  = ai[i+1] - a->diag[i] - 1;
1065       sum = tmp[i];
1066       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1067       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1068     }
1069 
1070     b += n;
1071     x += n;
1072   }
1073   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1074   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1075   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
1076   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
1077   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 #undef __FUNCT__
1082 #define __FUNCT__ "MatMatSolve_SeqAIJ"
1083 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1084 {
1085   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1086   IS              iscol = a->col,isrow = a->row;
1087   PetscErrorCode  ierr;
1088   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1089   PetscInt        nz,neq;
1090   const PetscInt  *rout,*cout,*r,*c;
1091   PetscScalar     *x,*b,*tmp,sum;
1092   const MatScalar *aa = a->a,*v;
1093   PetscBool       bisdense,xisdense;
1094 
1095   PetscFunctionBegin;
1096   if (!n) PetscFunctionReturn(0);
1097 
1098   ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1099   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1100   ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1101   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1102 
1103   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
1104   ierr = MatGetArray(X,&x);CHKERRQ(ierr);
1105 
1106   tmp  = a->solve_work;
1107   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1108   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1109 
1110   for (neq=0; neq<B->cmap->n; neq++){
1111     /* forward solve the lower triangular */
1112     tmp[0] = b[r[0]];
1113     v      = aa;
1114     vi     = aj;
1115     for (i=1; i<n; i++) {
1116       nz  = ai[i+1] - ai[i];
1117       sum = b[r[i]];
1118       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1119       tmp[i] = sum;
1120       v += nz; vi += nz;
1121     }
1122 
1123     /* backward solve the upper triangular */
1124     for (i=n-1; i>=0; i--){
1125       v   = aa + adiag[i+1]+1;
1126       vi  = aj + adiag[i+1]+1;
1127       nz  = adiag[i]-adiag[i+1]-1;
1128       sum = tmp[i];
1129       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1130       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1131     }
1132 
1133     b += n;
1134     x += n;
1135   }
1136   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1137   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1138   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
1139   ierr = MatRestoreArray(X,&x);CHKERRQ(ierr);
1140   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 #undef __FUNCT__
1145 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
1146 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1147 {
1148   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1149   IS              iscol = a->col,isrow = a->row;
1150   PetscErrorCode  ierr;
1151   const PetscInt  *r,*c,*rout,*cout;
1152   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1153   PetscInt        nz,row;
1154   PetscScalar     *x,*b,*tmp,*tmps,sum;
1155   const MatScalar *aa = a->a,*v;
1156 
1157   PetscFunctionBegin;
1158   if (!n) PetscFunctionReturn(0);
1159 
1160   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1161   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1162   tmp  = a->solve_work;
1163 
1164   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1165   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1166 
1167   /* forward solve the lower triangular */
1168   tmp[0] = b[*r++];
1169   tmps   = tmp;
1170   for (row=1; row<n; row++) {
1171     i   = rout[row]; /* permuted row */
1172     v   = aa + ai[i] ;
1173     vi  = aj + ai[i] ;
1174     nz  = a->diag[i] - ai[i];
1175     sum = b[*r++];
1176     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1177     tmp[row] = sum;
1178   }
1179 
1180   /* backward solve the upper triangular */
1181   for (row=n-1; row>=0; row--){
1182     i   = rout[row]; /* permuted row */
1183     v   = aa + a->diag[i] + 1;
1184     vi  = aj + a->diag[i] + 1;
1185     nz  = ai[i+1] - a->diag[i] - 1;
1186     sum = tmp[row];
1187     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1188     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1189   }
1190 
1191   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1192   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1193   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1194   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1195   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 /* ----------------------------------------------------------- */
1200 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace"
1203 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1204 {
1205   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1206   PetscErrorCode    ierr;
1207   PetscInt          n = A->rmap->n;
1208   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1209   PetscScalar       *x;
1210   const PetscScalar *b;
1211   const MatScalar   *aa = a->a;
1212 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1213   PetscInt          adiag_i,i,nz,ai_i;
1214   const PetscInt    *vi;
1215   const MatScalar   *v;
1216   PetscScalar       sum;
1217 #endif
1218 
1219   PetscFunctionBegin;
1220   if (!n) PetscFunctionReturn(0);
1221 
1222   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1223   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1224 
1225 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1226   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1227 #else
1228   /* forward solve the lower triangular */
1229   x[0] = b[0];
1230   for (i=1; i<n; i++) {
1231     ai_i = ai[i];
1232     v    = aa + ai_i;
1233     vi   = aj + ai_i;
1234     nz   = adiag[i] - ai_i;
1235     sum  = b[i];
1236     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1237     x[i] = sum;
1238   }
1239 
1240   /* backward solve the upper triangular */
1241   for (i=n-1; i>=0; i--){
1242     adiag_i = adiag[i];
1243     v       = aa + adiag_i + 1;
1244     vi      = aj + adiag_i + 1;
1245     nz      = ai[i+1] - adiag_i - 1;
1246     sum     = x[i];
1247     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1248     x[i]    = sum*aa[adiag_i];
1249   }
1250 #endif
1251   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1252   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1253   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace"
1259 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1260 {
1261   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1262   IS                iscol = a->col,isrow = a->row;
1263   PetscErrorCode    ierr;
1264   PetscInt          i, n = A->rmap->n,j;
1265   PetscInt          nz;
1266   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1267   PetscScalar       *x,*tmp,sum;
1268   const PetscScalar *b;
1269   const MatScalar   *aa = a->a,*v;
1270 
1271   PetscFunctionBegin;
1272   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1273 
1274   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1275   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1276   tmp  = a->solve_work;
1277 
1278   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1279   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1280 
1281   /* forward solve the lower triangular */
1282   tmp[0] = b[*r++];
1283   for (i=1; i<n; i++) {
1284     v   = aa + ai[i] ;
1285     vi  = aj + ai[i] ;
1286     nz  = a->diag[i] - ai[i];
1287     sum = b[*r++];
1288     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1289     tmp[i] = sum;
1290   }
1291 
1292   /* backward solve the upper triangular */
1293   for (i=n-1; i>=0; i--){
1294     v   = aa + a->diag[i] + 1;
1295     vi  = aj + a->diag[i] + 1;
1296     nz  = ai[i+1] - a->diag[i] - 1;
1297     sum = tmp[i];
1298     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1299     tmp[i] = sum*aa[a->diag[i]];
1300     x[*c--] += tmp[i];
1301   }
1302 
1303   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1304   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1305   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1306   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1307   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1308 
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1314 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1315 {
1316   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1317   IS                iscol = a->col,isrow = a->row;
1318   PetscErrorCode    ierr;
1319   PetscInt          i, n = A->rmap->n,j;
1320   PetscInt          nz;
1321   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1322   PetscScalar       *x,*tmp,sum;
1323   const PetscScalar *b;
1324   const MatScalar   *aa = a->a,*v;
1325 
1326   PetscFunctionBegin;
1327   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1328 
1329   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1330   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1331   tmp  = a->solve_work;
1332 
1333   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1334   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1335 
1336   /* forward solve the lower triangular */
1337   tmp[0] = b[r[0]];
1338   v      = aa;
1339   vi     = aj;
1340   for (i=1; i<n; i++) {
1341     nz  = ai[i+1] - ai[i];
1342     sum = b[r[i]];
1343     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1344     tmp[i] = sum;
1345     v += nz; vi += nz;
1346   }
1347 
1348   /* backward solve the upper triangular */
1349   v  = aa + adiag[n-1];
1350   vi = aj + adiag[n-1];
1351   for (i=n-1; i>=0; i--){
1352     nz  = adiag[i] - adiag[i+1] - 1;
1353     sum = tmp[i];
1354     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1355     tmp[i] = sum*v[nz];
1356     x[c[i]] += tmp[i];
1357     v += nz+1; vi += nz+1;
1358   }
1359 
1360   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1361   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1362   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1363   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1364   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1365 
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace"
1371 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1372 {
1373   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1374   IS                iscol = a->col,isrow = a->row;
1375   PetscErrorCode    ierr;
1376   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1377   PetscInt          i,n = A->rmap->n,j;
1378   PetscInt          nz;
1379   PetscScalar       *x,*tmp,s1;
1380   const MatScalar   *aa = a->a,*v;
1381   const PetscScalar *b;
1382 
1383   PetscFunctionBegin;
1384   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1385   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1386   tmp  = a->solve_work;
1387 
1388   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1389   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1390 
1391   /* copy the b into temp work space according to permutation */
1392   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1393 
1394   /* forward solve the U^T */
1395   for (i=0; i<n; i++) {
1396     v   = aa + diag[i] ;
1397     vi  = aj + diag[i] + 1;
1398     nz  = ai[i+1] - diag[i] - 1;
1399     s1  = tmp[i];
1400     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1401     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1402     tmp[i] = s1;
1403   }
1404 
1405   /* backward solve the L^T */
1406   for (i=n-1; i>=0; i--){
1407     v   = aa + diag[i] - 1 ;
1408     vi  = aj + diag[i] - 1 ;
1409     nz  = diag[i] - ai[i];
1410     s1  = tmp[i];
1411     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1412   }
1413 
1414   /* copy tmp into x according to permutation */
1415   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1416 
1417   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1418   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1419   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1420   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1421 
1422   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 #undef __FUNCT__
1427 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1428 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1429 {
1430   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1431   IS                iscol = a->col,isrow = a->row;
1432   PetscErrorCode    ierr;
1433   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1434   PetscInt          i,n = A->rmap->n,j;
1435   PetscInt          nz;
1436   PetscScalar       *x,*tmp,s1;
1437   const MatScalar   *aa = a->a,*v;
1438   const PetscScalar *b;
1439 
1440   PetscFunctionBegin;
1441   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1442   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1443   tmp  = a->solve_work;
1444 
1445   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1446   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1447 
1448   /* copy the b into temp work space according to permutation */
1449   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1450 
1451   /* forward solve the U^T */
1452   for (i=0; i<n; i++) {
1453     v   = aa + adiag[i+1] + 1;
1454     vi  = aj + adiag[i+1] + 1;
1455     nz  = adiag[i] - adiag[i+1] - 1;
1456     s1  = tmp[i];
1457     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1458     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1459     tmp[i] = s1;
1460   }
1461 
1462   /* backward solve the L^T */
1463   for (i=n-1; i>=0; i--){
1464     v   = aa + ai[i];
1465     vi  = aj + ai[i];
1466     nz  = ai[i+1] - ai[i];
1467     s1  = tmp[i];
1468     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1469   }
1470 
1471   /* copy tmp into x according to permutation */
1472   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1473 
1474   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1475   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1476   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1477   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1478 
1479   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 #undef __FUNCT__
1484 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace"
1485 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1486 {
1487   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1488   IS                iscol = a->col,isrow = a->row;
1489   PetscErrorCode    ierr;
1490   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1491   PetscInt          i,n = A->rmap->n,j;
1492   PetscInt          nz;
1493   PetscScalar       *x,*tmp,s1;
1494   const MatScalar   *aa = a->a,*v;
1495   const PetscScalar *b;
1496 
1497   PetscFunctionBegin;
1498   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1499   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1500   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1501   tmp  = a->solve_work;
1502 
1503   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1504   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1505 
1506   /* copy the b into temp work space according to permutation */
1507   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1508 
1509   /* forward solve the U^T */
1510   for (i=0; i<n; i++) {
1511     v   = aa + diag[i] ;
1512     vi  = aj + diag[i] + 1;
1513     nz  = ai[i+1] - diag[i] - 1;
1514     s1  = tmp[i];
1515     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1516     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1517     tmp[i] = s1;
1518   }
1519 
1520   /* backward solve the L^T */
1521   for (i=n-1; i>=0; i--){
1522     v   = aa + diag[i] - 1 ;
1523     vi  = aj + diag[i] - 1 ;
1524     nz  = diag[i] - ai[i];
1525     s1  = tmp[i];
1526     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1527   }
1528 
1529   /* copy tmp into x according to permutation */
1530   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1531 
1532   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1533   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1534   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1535   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1536 
1537   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 #undef __FUNCT__
1542 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1543 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1544 {
1545   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1546   IS                iscol = a->col,isrow = a->row;
1547   PetscErrorCode    ierr;
1548   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1549   PetscInt          i,n = A->rmap->n,j;
1550   PetscInt          nz;
1551   PetscScalar       *x,*tmp,s1;
1552   const MatScalar   *aa = a->a,*v;
1553   const PetscScalar *b;
1554 
1555   PetscFunctionBegin;
1556   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1557   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1558   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1559   tmp  = a->solve_work;
1560 
1561   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1562   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1563 
1564   /* copy the b into temp work space according to permutation */
1565   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1566 
1567   /* forward solve the U^T */
1568   for (i=0; i<n; i++) {
1569     v   = aa + adiag[i+1] + 1;
1570     vi  = aj + adiag[i+1] + 1;
1571     nz  = adiag[i] - adiag[i+1] - 1;
1572     s1  = tmp[i];
1573     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1574     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1575     tmp[i] = s1;
1576   }
1577 
1578 
1579   /* backward solve the L^T */
1580   for (i=n-1; i>=0; i--){
1581     v   = aa + ai[i] ;
1582     vi  = aj + ai[i];
1583     nz  = ai[i+1] - ai[i];
1584     s1  = tmp[i];
1585     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1586   }
1587 
1588   /* copy tmp into x according to permutation */
1589   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1590 
1591   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1592   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1593   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1594   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1595 
1596   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 /* ----------------------------------------------------------------*/
1601 
1602 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool );
1603 
1604 /*
1605    ilu() under revised new data structure.
1606    Factored arrays bj and ba are stored as
1607      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1608 
1609    bi=fact->i is an array of size n+1, in which
1610    bi+
1611      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1612      bi[n]:  points to L(n-1,n-1)+1
1613 
1614   bdiag=fact->diag is an array of size n+1,in which
1615      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1616      bdiag[n]: points to entry of U(n-1,0)-1
1617 
1618    U(i,:) contains bdiag[i] as its last entry, i.e.,
1619     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1620 */
1621 #undef __FUNCT__
1622 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0"
1623 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1624 {
1625 
1626   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1627   PetscErrorCode     ierr;
1628   const PetscInt     n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1629   PetscInt           i,j,k=0,nz,*bi,*bj,*bdiag;
1630   PetscBool          missing;
1631   IS                 isicol;
1632 
1633   PetscFunctionBegin;
1634   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1635   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1636   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1637   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1638 
1639   ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr);
1640   b    = (Mat_SeqAIJ*)(fact)->data;
1641 
1642   /* allocate matrix arrays for new data structure */
1643   ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr);
1644   ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1645   b->singlemalloc = PETSC_TRUE;
1646   if (!b->diag){
1647     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr);
1648     ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1649   }
1650   bdiag = b->diag;
1651 
1652   if (n > 0) {
1653     ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr);
1654   }
1655 
1656   /* set bi and bj with new data structure */
1657   bi = b->i;
1658   bj = b->j;
1659 
1660   /* L part */
1661   bi[0] = 0;
1662   for (i=0; i<n; i++){
1663     nz = adiag[i] - ai[i];
1664     bi[i+1] = bi[i] + nz;
1665     aj = a->j + ai[i];
1666     for (j=0; j<nz; j++){
1667       /*   *bj = aj[j]; bj++; */
1668       bj[k++] = aj[j];
1669     }
1670   }
1671 
1672   /* U part */
1673   bdiag[n] = bi[n]-1;
1674   for (i=n-1; i>=0; i--){
1675     nz = ai[i+1] - adiag[i] - 1;
1676     aj = a->j + adiag[i] + 1;
1677     for (j=0; j<nz; j++){
1678       /*      *bj = aj[j]; bj++; */
1679       bj[k++] = aj[j];
1680     }
1681     /* diag[i] */
1682     /*    *bj = i; bj++; */
1683     bj[k++] = i;
1684     bdiag[i] = bdiag[i+1] + nz + 1;
1685   }
1686 
1687   fact->factortype             = MAT_FACTOR_ILU;
1688   fact->info.factor_mallocs    = 0;
1689   fact->info.fill_ratio_given  = info->fill;
1690   fact->info.fill_ratio_needed = 1.0;
1691   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1692 
1693   b       = (Mat_SeqAIJ*)(fact)->data;
1694   b->row  = isrow;
1695   b->col  = iscol;
1696   b->icol = isicol;
1697   ierr    = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1698   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1699   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1705 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1706 {
1707   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1708   IS                 isicol;
1709   PetscErrorCode     ierr;
1710   const PetscInt     *r,*ic;
1711   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1712   PetscInt           *bi,*cols,nnz,*cols_lvl;
1713   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1714   PetscInt           i,levels,diagonal_fill;
1715   PetscBool          col_identity,row_identity;
1716   PetscReal          f;
1717   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1718   PetscBT            lnkbt;
1719   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1720   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1721   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1722 
1723   PetscFunctionBegin;
1724   /* Uncomment the old data struct part only while testing new data structure for MatSolve() */
1725   /*
1726   PetscBool          olddatastruct=PETSC_FALSE;
1727   ierr = PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
1728   if(olddatastruct){
1729     ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1730     PetscFunctionReturn(0);
1731   }
1732   */
1733 
1734   levels = (PetscInt)info->levels;
1735   ierr   = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1736   ierr   = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1737 
1738   if (!levels && row_identity && col_identity) {
1739     /* special case: ilu(0) with natural ordering */
1740     ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1741     if (a->inode.size) {
1742       fact->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1743     }
1744     PetscFunctionReturn(0);
1745   }
1746 
1747   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1748   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1749   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1750   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1751 
1752   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1753   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1754   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1755   bi[0] = bdiag[0] = 0;
1756 
1757   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1758 
1759   /* create a linked list for storing column indices of the active row */
1760   nlnk = n + 1;
1761   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1762 
1763   /* initial FreeSpace size is f*(ai[n]+1) */
1764   f             = info->fill;
1765   diagonal_fill = (PetscInt)info->diagonal_fill;
1766   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1767   current_space = free_space;
1768   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1769   current_space_lvl = free_space_lvl;
1770 
1771   for (i=0; i<n; i++) {
1772     nzi = 0;
1773     /* copy current row into linked list */
1774     nnz  = ai[r[i]+1] - ai[r[i]];
1775     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1776     cols = aj + ai[r[i]];
1777     lnk[i] = -1; /* marker to indicate if diagonal exists */
1778     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1779     nzi += nlnk;
1780 
1781     /* make sure diagonal entry is included */
1782     if (diagonal_fill && lnk[i] == -1) {
1783       fm = n;
1784       while (lnk[fm] < i) fm = lnk[fm];
1785       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1786       lnk[fm]    = i;
1787       lnk_lvl[i] = 0;
1788       nzi++; dcount++;
1789     }
1790 
1791     /* add pivot rows into the active row */
1792     nzbd = 0;
1793     prow = lnk[n];
1794     while (prow < i) {
1795       nnz      = bdiag[prow];
1796       cols     = bj_ptr[prow] + nnz + 1;
1797       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1798       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1799       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1800       nzi += nlnk;
1801       prow = lnk[prow];
1802       nzbd++;
1803     }
1804     bdiag[i] = nzbd;
1805     bi[i+1]  = bi[i] + nzi;
1806 
1807     /* if free space is not available, make more free space */
1808     if (current_space->local_remaining<nzi) {
1809       nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1810       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1811       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1812       reallocs++;
1813     }
1814 
1815     /* copy data into free_space and free_space_lvl, then initialize lnk */
1816     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1817     bj_ptr[i]    = current_space->array;
1818     bjlvl_ptr[i] = current_space_lvl->array;
1819 
1820     /* make sure the active row i has diagonal entry */
1821     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1822 
1823     current_space->array           += nzi;
1824     current_space->local_used      += nzi;
1825     current_space->local_remaining -= nzi;
1826     current_space_lvl->array           += nzi;
1827     current_space_lvl->local_used      += nzi;
1828     current_space_lvl->local_remaining -= nzi;
1829   }
1830 
1831   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1832   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1833 
1834   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1835   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1836   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
1837 
1838   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1839   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1840   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
1841 
1842 #if defined(PETSC_USE_INFO)
1843   {
1844     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1845     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1846     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1847     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1848     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1849     if (diagonal_fill) {
1850       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1851     }
1852   }
1853 #endif
1854 
1855   /* put together the new matrix */
1856   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1857   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1858   b = (Mat_SeqAIJ*)(fact)->data;
1859   b->free_a       = PETSC_TRUE;
1860   b->free_ij      = PETSC_TRUE;
1861   b->singlemalloc = PETSC_FALSE;
1862   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
1863   b->j          = bj;
1864   b->i          = bi;
1865   b->diag       = bdiag;
1866   b->ilen       = 0;
1867   b->imax       = 0;
1868   b->row        = isrow;
1869   b->col        = iscol;
1870   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1871   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1872   b->icol       = isicol;
1873   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1874   /* In b structure:  Free imax, ilen, old a, old j.
1875      Allocate bdiag, solve_work, new a, new j */
1876   ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1877   b->maxnz = b->nz = bdiag[0]+1;
1878   (fact)->info.factor_mallocs    = reallocs;
1879   (fact)->info.fill_ratio_given  = f;
1880   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1881   (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1882   if (a->inode.size) {
1883     (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1884   }
1885   PetscFunctionReturn(0);
1886 }
1887 
1888 #undef __FUNCT__
1889 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace"
1890 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1891 {
1892   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1893   IS                 isicol;
1894   PetscErrorCode     ierr;
1895   const PetscInt     *r,*ic;
1896   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1897   PetscInt           *bi,*cols,nnz,*cols_lvl;
1898   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1899   PetscInt           i,levels,diagonal_fill;
1900   PetscBool          col_identity,row_identity;
1901   PetscReal          f;
1902   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1903   PetscBT            lnkbt;
1904   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1905   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1906   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1907   PetscBool          missing;
1908 
1909   PetscFunctionBegin;
1910   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1911   f             = info->fill;
1912   levels        = (PetscInt)info->levels;
1913   diagonal_fill = (PetscInt)info->diagonal_fill;
1914   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1915 
1916   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1917   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1918   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1919     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr);
1920     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1921     if (a->inode.size) {
1922       (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1923     }
1924     fact->factortype = MAT_FACTOR_ILU;
1925     (fact)->info.factor_mallocs    = 0;
1926     (fact)->info.fill_ratio_given  = info->fill;
1927     (fact)->info.fill_ratio_needed = 1.0;
1928     b               = (Mat_SeqAIJ*)(fact)->data;
1929     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1930     if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1931     b->row              = isrow;
1932     b->col              = iscol;
1933     b->icol             = isicol;
1934     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1935     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1936     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1937     PetscFunctionReturn(0);
1938   }
1939 
1940   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1941   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1942 
1943   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1944   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1945   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1946   bi[0] = bdiag[0] = 0;
1947 
1948   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1949 
1950   /* create a linked list for storing column indices of the active row */
1951   nlnk = n + 1;
1952   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1953 
1954   /* initial FreeSpace size is f*(ai[n]+1) */
1955   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1956   current_space = free_space;
1957   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1958   current_space_lvl = free_space_lvl;
1959 
1960   for (i=0; i<n; i++) {
1961     nzi = 0;
1962     /* copy current row into linked list */
1963     nnz  = ai[r[i]+1] - ai[r[i]];
1964     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1965     cols = aj + ai[r[i]];
1966     lnk[i] = -1; /* marker to indicate if diagonal exists */
1967     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1968     nzi += nlnk;
1969 
1970     /* make sure diagonal entry is included */
1971     if (diagonal_fill && lnk[i] == -1) {
1972       fm = n;
1973       while (lnk[fm] < i) fm = lnk[fm];
1974       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1975       lnk[fm]    = i;
1976       lnk_lvl[i] = 0;
1977       nzi++; dcount++;
1978     }
1979 
1980     /* add pivot rows into the active row */
1981     nzbd = 0;
1982     prow = lnk[n];
1983     while (prow < i) {
1984       nnz      = bdiag[prow];
1985       cols     = bj_ptr[prow] + nnz + 1;
1986       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1987       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1988       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1989       nzi += nlnk;
1990       prow = lnk[prow];
1991       nzbd++;
1992     }
1993     bdiag[i] = nzbd;
1994     bi[i+1]  = bi[i] + nzi;
1995 
1996     /* if free space is not available, make more free space */
1997     if (current_space->local_remaining<nzi) {
1998       nnz = nzi*(n - i); /* estimated and max additional space needed */
1999       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
2000       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
2001       reallocs++;
2002     }
2003 
2004     /* copy data into free_space and free_space_lvl, then initialize lnk */
2005     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2006     bj_ptr[i]    = current_space->array;
2007     bjlvl_ptr[i] = current_space_lvl->array;
2008 
2009     /* make sure the active row i has diagonal entry */
2010     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2011 
2012     current_space->array           += nzi;
2013     current_space->local_used      += nzi;
2014     current_space->local_remaining -= nzi;
2015     current_space_lvl->array           += nzi;
2016     current_space_lvl->local_used      += nzi;
2017     current_space_lvl->local_remaining -= nzi;
2018   }
2019 
2020   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2021   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2022 
2023   /* destroy list of free space and other temporary arrays */
2024   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
2025   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */
2026   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2027   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2028   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
2029 
2030 #if defined(PETSC_USE_INFO)
2031   {
2032     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2033     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
2034     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2035     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
2036     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2037     if (diagonal_fill) {
2038       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
2039     }
2040   }
2041 #endif
2042 
2043   /* put together the new matrix */
2044   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
2045   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
2046   b = (Mat_SeqAIJ*)(fact)->data;
2047   b->free_a       = PETSC_TRUE;
2048   b->free_ij      = PETSC_TRUE;
2049   b->singlemalloc = PETSC_FALSE;
2050   ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
2051   b->j          = bj;
2052   b->i          = bi;
2053   for (i=0; i<n; i++) bdiag[i] += bi[i];
2054   b->diag       = bdiag;
2055   b->ilen       = 0;
2056   b->imax       = 0;
2057   b->row        = isrow;
2058   b->col        = iscol;
2059   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2060   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2061   b->icol       = isicol;
2062   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2063   /* In b structure:  Free imax, ilen, old a, old j.
2064      Allocate bdiag, solve_work, new a, new j */
2065   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
2066   b->maxnz             = b->nz = bi[n] ;
2067   (fact)->info.factor_mallocs    = reallocs;
2068   (fact)->info.fill_ratio_given  = f;
2069   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2070   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
2071   if (a->inode.size) {
2072     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2073   }
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 #undef __FUNCT__
2078 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
2079 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2080 {
2081   Mat            C = B;
2082   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2083   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2084   IS             ip=b->row,iip = b->icol;
2085   PetscErrorCode ierr;
2086   const PetscInt *rip,*riip;
2087   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2088   PetscInt       *ai=a->i,*aj=a->j;
2089   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2090   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2091   PetscBool      perm_identity;
2092   FactorShiftCtx sctx;
2093   PetscReal      rs;
2094   MatScalar      d,*v;
2095 
2096   PetscFunctionBegin;
2097   /* MatPivotSetUp(): initialize shift context sctx */
2098   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2099 
2100   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2101     sctx.shift_top = info->zeropivot;
2102     for (i=0; i<mbs; i++) {
2103       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2104       d  = (aa)[a->diag[i]];
2105       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2106       v  = aa+ai[i];
2107       nz = ai[i+1] - ai[i];
2108       for (j=0; j<nz; j++)
2109 	rs += PetscAbsScalar(v[j]);
2110       if (rs>sctx.shift_top) sctx.shift_top = rs;
2111     }
2112     sctx.shift_top   *= 1.1;
2113     sctx.nshift_max   = 5;
2114     sctx.shift_lo     = 0.;
2115     sctx.shift_hi     = 1.;
2116   }
2117 
2118   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2119   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2120 
2121   /* allocate working arrays
2122      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2123      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2124   */
2125   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr);
2126 
2127   do {
2128     sctx.newshift = PETSC_FALSE;
2129 
2130     for (i=0; i<mbs; i++) c2r[i] = mbs;
2131     il[0] = 0;
2132 
2133     for (k = 0; k<mbs; k++){
2134       /* zero rtmp */
2135       nz = bi[k+1] - bi[k];
2136       bjtmp = bj + bi[k];
2137       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2138 
2139       /* load in initial unfactored row */
2140       bval = ba + bi[k];
2141       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2142       for (j = jmin; j < jmax; j++){
2143         col = riip[aj[j]];
2144         if (col >= k){ /* only take upper triangular entry */
2145           rtmp[col] = aa[j];
2146           *bval++   = 0.0; /* for in-place factorization */
2147         }
2148       }
2149       /* shift the diagonal of the matrix: ZeropivotApply() */
2150       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2151 
2152       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2153       dk = rtmp[k];
2154       i  = c2r[k]; /* first row to be added to k_th row  */
2155 
2156       while (i < k){
2157         nexti = c2r[i]; /* next row to be added to k_th row */
2158 
2159         /* compute multiplier, update diag(k) and U(i,k) */
2160         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2161         uikdi = - ba[ili]*ba[bdiag[i]];  /* diagonal(k) */
2162         dk   += uikdi*ba[ili]; /* update diag[k] */
2163         ba[ili] = uikdi; /* -U(i,k) */
2164 
2165         /* add multiple of row i to k-th row */
2166         jmin = ili + 1; jmax = bi[i+1];
2167         if (jmin < jmax){
2168           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2169           /* update il and c2r for row i */
2170           il[i] = jmin;
2171           j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2172         }
2173         i = nexti;
2174       }
2175 
2176       /* copy data into U(k,:) */
2177       rs   = 0.0;
2178       jmin = bi[k]; jmax = bi[k+1]-1;
2179       if (jmin < jmax) {
2180         for (j=jmin; j<jmax; j++){
2181           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2182         }
2183         /* add the k-th row into il and c2r */
2184         il[k] = jmin;
2185         i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2186       }
2187 
2188       /* MatPivotCheck() */
2189       sctx.rs  = rs;
2190       sctx.pv  = dk;
2191       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
2192       if(sctx.newshift) break;
2193       dk = sctx.pv;
2194 
2195       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2196     }
2197   } while (sctx.newshift);
2198 
2199   ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr);
2200   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2201   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2202 
2203   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2204   if (perm_identity){
2205     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2206     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2207     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2208     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2209   } else {
2210     B->ops->solve           = MatSolve_SeqSBAIJ_1;
2211     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
2212     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
2213     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
2214   }
2215 
2216   C->assembled    = PETSC_TRUE;
2217   C->preallocated = PETSC_TRUE;
2218   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2219 
2220   /* MatPivotView() */
2221   if (sctx.nshift){
2222     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2223       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
2224     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2225       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2226     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
2227       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
2228     }
2229   }
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace"
2235 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2236 {
2237   Mat            C = B;
2238   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2239   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2240   IS             ip=b->row,iip = b->icol;
2241   PetscErrorCode ierr;
2242   const PetscInt *rip,*riip;
2243   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2244   PetscInt       *ai=a->i,*aj=a->j;
2245   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2246   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2247   PetscBool      perm_identity;
2248   FactorShiftCtx sctx;
2249   PetscReal      rs;
2250   MatScalar      d,*v;
2251 
2252   PetscFunctionBegin;
2253   /* MatPivotSetUp(): initialize shift context sctx */
2254   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2255 
2256   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2257     sctx.shift_top = info->zeropivot;
2258     for (i=0; i<mbs; i++) {
2259       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2260       d  = (aa)[a->diag[i]];
2261       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2262       v  = aa+ai[i];
2263       nz = ai[i+1] - ai[i];
2264       for (j=0; j<nz; j++)
2265 	rs += PetscAbsScalar(v[j]);
2266       if (rs>sctx.shift_top) sctx.shift_top = rs;
2267     }
2268     sctx.shift_top   *= 1.1;
2269     sctx.nshift_max   = 5;
2270     sctx.shift_lo     = 0.;
2271     sctx.shift_hi     = 1.;
2272   }
2273 
2274   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2275   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2276 
2277   /* initialization */
2278   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr);
2279 
2280   do {
2281     sctx.newshift = PETSC_FALSE;
2282 
2283     for (i=0; i<mbs; i++) jl[i] = mbs;
2284     il[0] = 0;
2285 
2286     for (k = 0; k<mbs; k++){
2287       /* zero rtmp */
2288       nz = bi[k+1] - bi[k];
2289       bjtmp = bj + bi[k];
2290       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2291 
2292       bval = ba + bi[k];
2293       /* initialize k-th row by the perm[k]-th row of A */
2294       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2295       for (j = jmin; j < jmax; j++){
2296         col = riip[aj[j]];
2297         if (col >= k){ /* only take upper triangular entry */
2298           rtmp[col] = aa[j];
2299           *bval++  = 0.0; /* for in-place factorization */
2300         }
2301       }
2302       /* shift the diagonal of the matrix */
2303       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2304 
2305       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2306       dk = rtmp[k];
2307       i = jl[k]; /* first row to be added to k_th row  */
2308 
2309       while (i < k){
2310         nexti = jl[i]; /* next row to be added to k_th row */
2311 
2312         /* compute multiplier, update diag(k) and U(i,k) */
2313         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2314         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
2315         dk += uikdi*ba[ili];
2316         ba[ili] = uikdi; /* -U(i,k) */
2317 
2318         /* add multiple of row i to k-th row */
2319         jmin = ili + 1; jmax = bi[i+1];
2320         if (jmin < jmax){
2321           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2322           /* update il and jl for row i */
2323           il[i] = jmin;
2324           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2325         }
2326         i = nexti;
2327       }
2328 
2329       /* shift the diagonals when zero pivot is detected */
2330       /* compute rs=sum of abs(off-diagonal) */
2331       rs   = 0.0;
2332       jmin = bi[k]+1;
2333       nz   = bi[k+1] - jmin;
2334       bcol = bj + jmin;
2335       for (j=0; j<nz; j++) {
2336         rs += PetscAbsScalar(rtmp[bcol[j]]);
2337       }
2338 
2339       sctx.rs = rs;
2340       sctx.pv = dk;
2341       ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr);
2342       if (sctx.newshift) break;
2343       dk = sctx.pv;
2344 
2345       /* copy data into U(k,:) */
2346       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2347       jmin = bi[k]+1; jmax = bi[k+1];
2348       if (jmin < jmax) {
2349         for (j=jmin; j<jmax; j++){
2350           col = bj[j]; ba[j] = rtmp[col];
2351         }
2352         /* add the k-th row into il and jl */
2353         il[k] = jmin;
2354         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2355       }
2356     }
2357   } while (sctx.newshift);
2358 
2359   ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr);
2360   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2361   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2362 
2363   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2364   if (perm_identity){
2365     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2366     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2367     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2368     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2369   } else {
2370     B->ops->solve           = MatSolve_SeqSBAIJ_1_inplace;
2371     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_inplace;
2372     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_inplace;
2373     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_inplace;
2374   }
2375 
2376   C->assembled    = PETSC_TRUE;
2377   C->preallocated = PETSC_TRUE;
2378   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2379   if (sctx.nshift){
2380     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2381       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2382     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2383       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2384     }
2385   }
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 /*
2390    icc() under revised new data structure.
2391    Factored arrays bj and ba are stored as
2392      U(0,:),...,U(i,:),U(n-1,:)
2393 
2394    ui=fact->i is an array of size n+1, in which
2395    ui+
2396      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2397      ui[n]:  points to U(n-1,n-1)+1
2398 
2399   udiag=fact->diag is an array of size n,in which
2400      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2401 
2402    U(i,:) contains udiag[i] as its last entry, i.e.,
2403     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2404 */
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
2408 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2409 {
2410   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2411   Mat_SeqSBAIJ       *b;
2412   PetscErrorCode     ierr;
2413   PetscBool          perm_identity,missing;
2414   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2415   const PetscInt     *rip,*riip;
2416   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2417   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2418   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2419   PetscReal          fill=info->fill,levels=info->levels;
2420   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2421   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2422   PetscBT            lnkbt;
2423   IS                 iperm;
2424 
2425   PetscFunctionBegin;
2426   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2427   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2428   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2429   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2430   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2431 
2432   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2433   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2434   ui[0] = 0;
2435 
2436   /* ICC(0) without matrix ordering: simply rearrange column indices */
2437   if (!levels && perm_identity) {
2438     for (i=0; i<am; i++) {
2439       ncols    = ai[i+1] - a->diag[i];
2440       ui[i+1]  = ui[i] + ncols;
2441       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2442     }
2443     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2444     cols = uj;
2445     for (i=0; i<am; i++) {
2446       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2447       ncols = ai[i+1] - a->diag[i] -1;
2448       for (j=0; j<ncols; j++) *cols++ = aj[j];
2449       *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2450     }
2451   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2452     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2453     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2454 
2455     /* initialization */
2456     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2457 
2458     /* jl: linked list for storing indices of the pivot rows
2459        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2460     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2461     for (i=0; i<am; i++){
2462       jl[i] = am; il[i] = 0;
2463     }
2464 
2465     /* create and initialize a linked list for storing column indices of the active row k */
2466     nlnk = am + 1;
2467     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2468 
2469     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2470     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2471     current_space = free_space;
2472     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr);
2473     current_space_lvl = free_space_lvl;
2474 
2475     for (k=0; k<am; k++){  /* for each active row k */
2476       /* initialize lnk by the column indices of row rip[k] of A */
2477       nzk   = 0;
2478       ncols = ai[rip[k]+1] - ai[rip[k]];
2479       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2480       ncols_upper = 0;
2481       for (j=0; j<ncols; j++){
2482         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2483         if (riip[i] >= k){ /* only take upper triangular entry */
2484           ajtmp[ncols_upper] = i;
2485           ncols_upper++;
2486         }
2487       }
2488       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2489       nzk += nlnk;
2490 
2491       /* update lnk by computing fill-in for each pivot row to be merged in */
2492       prow = jl[k]; /* 1st pivot row */
2493 
2494       while (prow < k){
2495         nextprow = jl[prow];
2496 
2497         /* merge prow into k-th row */
2498         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2499         jmax = ui[prow+1];
2500         ncols = jmax-jmin;
2501         i     = jmin - ui[prow];
2502         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2503         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2504         j     = *(uj - 1);
2505         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2506         nzk += nlnk;
2507 
2508         /* update il and jl for prow */
2509         if (jmin < jmax){
2510           il[prow] = jmin;
2511           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2512         }
2513         prow = nextprow;
2514       }
2515 
2516       /* if free space is not available, make more free space */
2517       if (current_space->local_remaining<nzk) {
2518         i  = am - k + 1; /* num of unfactored rows */
2519         i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2520         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2521         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2522         reallocs++;
2523       }
2524 
2525       /* copy data into free_space and free_space_lvl, then initialize lnk */
2526       if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2527       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2528 
2529       /* add the k-th row into il and jl */
2530       if (nzk > 1){
2531         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2532         jl[k] = jl[i]; jl[i] = k;
2533         il[k] = ui[k] + 1;
2534       }
2535       uj_ptr[k]     = current_space->array;
2536       uj_lvl_ptr[k] = current_space_lvl->array;
2537 
2538       current_space->array           += nzk;
2539       current_space->local_used      += nzk;
2540       current_space->local_remaining -= nzk;
2541 
2542       current_space_lvl->array           += nzk;
2543       current_space_lvl->local_used      += nzk;
2544       current_space_lvl->local_remaining -= nzk;
2545 
2546       ui[k+1] = ui[k] + nzk;
2547     }
2548 
2549     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2550     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2551     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2552     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2553 
2554     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2555     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2556     ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor  */
2557     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2558     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2559 
2560   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2561 
2562   /* put together the new matrix in MATSEQSBAIJ format */
2563   b    = (Mat_SeqSBAIJ*)(fact)->data;
2564   b->singlemalloc = PETSC_FALSE;
2565   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2566   b->j    = uj;
2567   b->i    = ui;
2568   b->diag = udiag;
2569   b->free_diag = PETSC_TRUE;
2570   b->ilen = 0;
2571   b->imax = 0;
2572   b->row  = perm;
2573   b->col  = perm;
2574   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2575   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2576   b->icol = iperm;
2577   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2578   ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2579   ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2580   b->maxnz   = b->nz = ui[am];
2581   b->free_a  = PETSC_TRUE;
2582   b->free_ij = PETSC_TRUE;
2583 
2584   fact->info.factor_mallocs   = reallocs;
2585   fact->info.fill_ratio_given = fill;
2586   if (ai[am] != 0) {
2587     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2588     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2589   } else {
2590     fact->info.fill_ratio_needed = 0.0;
2591   }
2592 #if defined(PETSC_USE_INFO)
2593     if (ai[am] != 0) {
2594       PetscReal af = fact->info.fill_ratio_needed;
2595       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2596       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2597       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2598     } else {
2599       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2600     }
2601 #endif
2602   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2603   PetscFunctionReturn(0);
2604 }
2605 
2606 #undef __FUNCT__
2607 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace"
2608 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2609 {
2610   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2611   Mat_SeqSBAIJ       *b;
2612   PetscErrorCode     ierr;
2613   PetscBool          perm_identity,missing;
2614   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2615   const PetscInt     *rip,*riip;
2616   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2617   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2618   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2619   PetscReal          fill=info->fill,levels=info->levels;
2620   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2621   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2622   PetscBT            lnkbt;
2623   IS                 iperm;
2624 
2625   PetscFunctionBegin;
2626   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2627   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2628   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2629   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2630   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2631 
2632   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2633   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2634   ui[0] = 0;
2635 
2636   /* ICC(0) without matrix ordering: simply copies fill pattern */
2637   if (!levels && perm_identity) {
2638 
2639     for (i=0; i<am; i++) {
2640       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2641       udiag[i] = ui[i];
2642     }
2643     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2644     cols = uj;
2645     for (i=0; i<am; i++) {
2646       aj    = a->j + a->diag[i];
2647       ncols = ui[i+1] - ui[i];
2648       for (j=0; j<ncols; j++) *cols++ = *aj++;
2649     }
2650   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2651     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2652     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2653 
2654     /* initialization */
2655     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2656 
2657     /* jl: linked list for storing indices of the pivot rows
2658        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2659     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2660     for (i=0; i<am; i++){
2661       jl[i] = am; il[i] = 0;
2662     }
2663 
2664     /* create and initialize a linked list for storing column indices of the active row k */
2665     nlnk = am + 1;
2666     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2667 
2668     /* initial FreeSpace size is fill*(ai[am]+1) */
2669     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2670     current_space = free_space;
2671     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2672     current_space_lvl = free_space_lvl;
2673 
2674     for (k=0; k<am; k++){  /* for each active row k */
2675       /* initialize lnk by the column indices of row rip[k] of A */
2676       nzk   = 0;
2677       ncols = ai[rip[k]+1] - ai[rip[k]];
2678       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2679       ncols_upper = 0;
2680       for (j=0; j<ncols; j++){
2681         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2682         if (riip[i] >= k){ /* only take upper triangular entry */
2683           ajtmp[ncols_upper] = i;
2684           ncols_upper++;
2685         }
2686       }
2687       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2688       nzk += nlnk;
2689 
2690       /* update lnk by computing fill-in for each pivot row to be merged in */
2691       prow = jl[k]; /* 1st pivot row */
2692 
2693       while (prow < k){
2694         nextprow = jl[prow];
2695 
2696         /* merge prow into k-th row */
2697         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2698         jmax = ui[prow+1];
2699         ncols = jmax-jmin;
2700         i     = jmin - ui[prow];
2701         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2702         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2703         j     = *(uj - 1);
2704         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2705         nzk += nlnk;
2706 
2707         /* update il and jl for prow */
2708         if (jmin < jmax){
2709           il[prow] = jmin;
2710           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2711         }
2712         prow = nextprow;
2713       }
2714 
2715       /* if free space is not available, make more free space */
2716       if (current_space->local_remaining<nzk) {
2717         i = am - k + 1; /* num of unfactored rows */
2718         i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2719         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2720         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2721         reallocs++;
2722       }
2723 
2724       /* copy data into free_space and free_space_lvl, then initialize lnk */
2725       if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2726       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2727 
2728       /* add the k-th row into il and jl */
2729       if (nzk > 1){
2730         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2731         jl[k] = jl[i]; jl[i] = k;
2732         il[k] = ui[k] + 1;
2733       }
2734       uj_ptr[k]     = current_space->array;
2735       uj_lvl_ptr[k] = current_space_lvl->array;
2736 
2737       current_space->array           += nzk;
2738       current_space->local_used      += nzk;
2739       current_space->local_remaining -= nzk;
2740 
2741       current_space_lvl->array           += nzk;
2742       current_space_lvl->local_used      += nzk;
2743       current_space_lvl->local_remaining -= nzk;
2744 
2745       ui[k+1] = ui[k] + nzk;
2746     }
2747 
2748 #if defined(PETSC_USE_INFO)
2749     if (ai[am] != 0) {
2750       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2751       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2752       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2753       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2754     } else {
2755       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2756     }
2757 #endif
2758 
2759     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2760     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2761     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2762     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2763 
2764     /* destroy list of free space and other temporary array(s) */
2765     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2766     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2767     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2768     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2769 
2770   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2771 
2772   /* put together the new matrix in MATSEQSBAIJ format */
2773 
2774   b    = (Mat_SeqSBAIJ*)fact->data;
2775   b->singlemalloc = PETSC_FALSE;
2776   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2777   b->j    = uj;
2778   b->i    = ui;
2779   b->diag = udiag;
2780   b->free_diag = PETSC_TRUE;
2781   b->ilen = 0;
2782   b->imax = 0;
2783   b->row  = perm;
2784   b->col  = perm;
2785   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2786   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2787   b->icol = iperm;
2788   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2789   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2790   ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2791   b->maxnz   = b->nz = ui[am];
2792   b->free_a  = PETSC_TRUE;
2793   b->free_ij = PETSC_TRUE;
2794 
2795   fact->info.factor_mallocs    = reallocs;
2796   fact->info.fill_ratio_given  = fill;
2797   if (ai[am] != 0) {
2798     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2799   } else {
2800     fact->info.fill_ratio_needed = 0.0;
2801   }
2802   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2803   PetscFunctionReturn(0);
2804 }
2805 
2806 #undef __FUNCT__
2807 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
2808 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2809 {
2810   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2811   Mat_SeqSBAIJ       *b;
2812   PetscErrorCode     ierr;
2813   PetscBool          perm_identity;
2814   PetscReal          fill = info->fill;
2815   const PetscInt     *rip,*riip;
2816   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2817   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2818   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2819   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2820   PetscBT            lnkbt;
2821   IS                 iperm;
2822 
2823   PetscFunctionBegin;
2824   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2825   /* check whether perm is the identity mapping */
2826   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2827   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2828   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2829   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2830 
2831   /* initialization */
2832   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2833   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2834   ui[0] = 0;
2835 
2836   /* jl: linked list for storing indices of the pivot rows
2837      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2838   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
2839   for (i=0; i<am; i++){
2840     jl[i] = am; il[i] = 0;
2841   }
2842 
2843   /* create and initialize a linked list for storing column indices of the active row k */
2844   nlnk = am + 1;
2845   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2846 
2847   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2848   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2849   current_space = free_space;
2850 
2851   for (k=0; k<am; k++){  /* for each active row k */
2852     /* initialize lnk by the column indices of row rip[k] of A */
2853     nzk   = 0;
2854     ncols = ai[rip[k]+1] - ai[rip[k]];
2855     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2856     ncols_upper = 0;
2857     for (j=0; j<ncols; j++){
2858       i = riip[*(aj + ai[rip[k]] + j)];
2859       if (i >= k){ /* only take upper triangular entry */
2860         cols[ncols_upper] = i;
2861         ncols_upper++;
2862       }
2863     }
2864     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2865     nzk += nlnk;
2866 
2867     /* update lnk by computing fill-in for each pivot row to be merged in */
2868     prow = jl[k]; /* 1st pivot row */
2869 
2870     while (prow < k){
2871       nextprow = jl[prow];
2872       /* merge prow into k-th row */
2873       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2874       jmax = ui[prow+1];
2875       ncols = jmax-jmin;
2876       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2877       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2878       nzk += nlnk;
2879 
2880       /* update il and jl for prow */
2881       if (jmin < jmax){
2882         il[prow] = jmin;
2883         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
2884       }
2885       prow = nextprow;
2886     }
2887 
2888     /* if free space is not available, make more free space */
2889     if (current_space->local_remaining<nzk) {
2890       i  = am - k + 1; /* num of unfactored rows */
2891       i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2892       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2893       reallocs++;
2894     }
2895 
2896     /* copy data into free space, then initialize lnk */
2897     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
2898 
2899     /* add the k-th row into il and jl */
2900     if (nzk > 1){
2901       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2902       jl[k] = jl[i]; jl[i] = k;
2903       il[k] = ui[k] + 1;
2904     }
2905     ui_ptr[k] = current_space->array;
2906     current_space->array           += nzk;
2907     current_space->local_used      += nzk;
2908     current_space->local_remaining -= nzk;
2909 
2910     ui[k+1] = ui[k] + nzk;
2911   }
2912 
2913   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2914   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2915   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
2916 
2917   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2918   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2919   ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */
2920   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2921 
2922   /* put together the new matrix in MATSEQSBAIJ format */
2923 
2924   b = (Mat_SeqSBAIJ*)fact->data;
2925   b->singlemalloc = PETSC_FALSE;
2926   b->free_a       = PETSC_TRUE;
2927   b->free_ij      = PETSC_TRUE;
2928   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2929   b->j    = uj;
2930   b->i    = ui;
2931   b->diag = udiag;
2932   b->free_diag = PETSC_TRUE;
2933   b->ilen = 0;
2934   b->imax = 0;
2935   b->row  = perm;
2936   b->col  = perm;
2937   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2938   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2939   b->icol = iperm;
2940   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2941   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2942   ierr    = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2943   b->maxnz = b->nz = ui[am];
2944 
2945   fact->info.factor_mallocs    = reallocs;
2946   fact->info.fill_ratio_given  = fill;
2947   if (ai[am] != 0) {
2948     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2949     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2950   } else {
2951     fact->info.fill_ratio_needed = 0.0;
2952   }
2953 #if defined(PETSC_USE_INFO)
2954   if (ai[am] != 0) {
2955     PetscReal af = fact->info.fill_ratio_needed;
2956     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2957     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2958     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2959   } else {
2960      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2961   }
2962 #endif
2963   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2964   PetscFunctionReturn(0);
2965 }
2966 
2967 #undef __FUNCT__
2968 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace"
2969 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2970 {
2971   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2972   Mat_SeqSBAIJ       *b;
2973   PetscErrorCode     ierr;
2974   PetscBool          perm_identity;
2975   PetscReal          fill = info->fill;
2976   const PetscInt     *rip,*riip;
2977   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2978   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2979   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2980   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2981   PetscBT            lnkbt;
2982   IS                 iperm;
2983 
2984   PetscFunctionBegin;
2985   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2986   /* check whether perm is the identity mapping */
2987   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2988   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2989   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2990   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2991 
2992   /* initialization */
2993   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2994   ui[0] = 0;
2995 
2996   /* jl: linked list for storing indices of the pivot rows
2997      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2998   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
2999   for (i=0; i<am; i++){
3000     jl[i] = am; il[i] = 0;
3001   }
3002 
3003   /* create and initialize a linked list for storing column indices of the active row k */
3004   nlnk = am + 1;
3005   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3006 
3007   /* initial FreeSpace size is fill*(ai[am]+1) */
3008   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
3009   current_space = free_space;
3010 
3011   for (k=0; k<am; k++){  /* for each active row k */
3012     /* initialize lnk by the column indices of row rip[k] of A */
3013     nzk   = 0;
3014     ncols = ai[rip[k]+1] - ai[rip[k]];
3015     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3016     ncols_upper = 0;
3017     for (j=0; j<ncols; j++){
3018       i = riip[*(aj + ai[rip[k]] + j)];
3019       if (i >= k){ /* only take upper triangular entry */
3020         cols[ncols_upper] = i;
3021         ncols_upper++;
3022       }
3023     }
3024     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3025     nzk += nlnk;
3026 
3027     /* update lnk by computing fill-in for each pivot row to be merged in */
3028     prow = jl[k]; /* 1st pivot row */
3029 
3030     while (prow < k){
3031       nextprow = jl[prow];
3032       /* merge prow into k-th row */
3033       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
3034       jmax = ui[prow+1];
3035       ncols = jmax-jmin;
3036       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3037       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3038       nzk += nlnk;
3039 
3040       /* update il and jl for prow */
3041       if (jmin < jmax){
3042         il[prow] = jmin;
3043         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3044       }
3045       prow = nextprow;
3046     }
3047 
3048     /* if free space is not available, make more free space */
3049     if (current_space->local_remaining<nzk) {
3050       i = am - k + 1; /* num of unfactored rows */
3051       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3052       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
3053       reallocs++;
3054     }
3055 
3056     /* copy data into free space, then initialize lnk */
3057     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3058 
3059     /* add the k-th row into il and jl */
3060     if (nzk-1 > 0){
3061       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3062       jl[k] = jl[i]; jl[i] = k;
3063       il[k] = ui[k] + 1;
3064     }
3065     ui_ptr[k] = current_space->array;
3066     current_space->array           += nzk;
3067     current_space->local_used      += nzk;
3068     current_space->local_remaining -= nzk;
3069 
3070     ui[k+1] = ui[k] + nzk;
3071   }
3072 
3073 #if defined(PETSC_USE_INFO)
3074   if (ai[am] != 0) {
3075     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3076     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
3077     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
3078     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
3079   } else {
3080      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
3081   }
3082 #endif
3083 
3084   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
3085   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
3086   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
3087 
3088   /* destroy list of free space and other temporary array(s) */
3089   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
3090   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
3091   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3092 
3093   /* put together the new matrix in MATSEQSBAIJ format */
3094 
3095   b = (Mat_SeqSBAIJ*)fact->data;
3096   b->singlemalloc = PETSC_FALSE;
3097   b->free_a       = PETSC_TRUE;
3098   b->free_ij      = PETSC_TRUE;
3099   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
3100   b->j    = uj;
3101   b->i    = ui;
3102   b->diag = 0;
3103   b->ilen = 0;
3104   b->imax = 0;
3105   b->row  = perm;
3106   b->col  = perm;
3107   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3108   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3109   b->icol = iperm;
3110   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3111   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3112   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3113   b->maxnz = b->nz = ui[am];
3114 
3115   fact->info.factor_mallocs    = reallocs;
3116   fact->info.fill_ratio_given  = fill;
3117   if (ai[am] != 0) {
3118     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3119   } else {
3120     fact->info.fill_ratio_needed = 0.0;
3121   }
3122   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3123   PetscFunctionReturn(0);
3124 }
3125 
3126 #undef __FUNCT__
3127 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
3128 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3129 {
3130   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3131   PetscErrorCode    ierr;
3132   PetscInt          n = A->rmap->n;
3133   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3134   PetscScalar       *x,sum;
3135   const PetscScalar *b;
3136   const MatScalar   *aa = a->a,*v;
3137   PetscInt          i,nz;
3138 
3139   PetscFunctionBegin;
3140   if (!n) PetscFunctionReturn(0);
3141 
3142   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3143   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3144 
3145   /* forward solve the lower triangular */
3146   x[0] = b[0];
3147   v    = aa;
3148   vi   = aj;
3149   for (i=1; i<n; i++) {
3150     nz  = ai[i+1] - ai[i];
3151     sum = b[i];
3152     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3153     v  += nz;
3154     vi += nz;
3155     x[i] = sum;
3156   }
3157 
3158   /* backward solve the upper triangular */
3159   for (i=n-1; i>=0; i--){
3160     v   = aa + adiag[i+1] + 1;
3161     vi  = aj + adiag[i+1] + 1;
3162     nz = adiag[i] - adiag[i+1]-1;
3163     sum = x[i];
3164     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3165     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3166   }
3167 
3168   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3169   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3170   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3171   PetscFunctionReturn(0);
3172 }
3173 
3174 #undef __FUNCT__
3175 #define __FUNCT__ "MatSolve_SeqAIJ"
3176 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3177 {
3178   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3179   IS                iscol = a->col,isrow = a->row;
3180   PetscErrorCode    ierr;
3181   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3182   const PetscInt    *rout,*cout,*r,*c;
3183   PetscScalar       *x,*tmp,sum;
3184   const PetscScalar *b;
3185   const MatScalar   *aa = a->a,*v;
3186 
3187   PetscFunctionBegin;
3188   if (!n) PetscFunctionReturn(0);
3189 
3190   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3191   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3192   tmp  = a->solve_work;
3193 
3194   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
3195   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
3196 
3197   /* forward solve the lower triangular */
3198   tmp[0] = b[r[0]];
3199   v      = aa;
3200   vi     = aj;
3201   for (i=1; i<n; i++) {
3202     nz  = ai[i+1] - ai[i];
3203     sum = b[r[i]];
3204     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3205     tmp[i] = sum;
3206     v += nz; vi += nz;
3207   }
3208 
3209   /* backward solve the upper triangular */
3210   for (i=n-1; i>=0; i--){
3211     v   = aa + adiag[i+1]+1;
3212     vi  = aj + adiag[i+1]+1;
3213     nz  = adiag[i]-adiag[i+1]-1;
3214     sum = tmp[i];
3215     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3216     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3217   }
3218 
3219   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
3220   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
3221   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3222   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3223   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
3224   PetscFunctionReturn(0);
3225 }
3226 
3227 #undef __FUNCT__
3228 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
3229 /*
3230     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3231 */
3232 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3233 {
3234   Mat                B = *fact;
3235   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b;
3236   IS                 isicol;
3237   PetscErrorCode     ierr;
3238   const PetscInt     *r,*ic;
3239   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3240   PetscInt           *bi,*bj,*bdiag,*bdiag_rev;
3241   PetscInt           row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3242   PetscInt           nlnk,*lnk;
3243   PetscBT            lnkbt;
3244   PetscBool          row_identity,icol_identity;
3245   MatScalar          *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3246   const PetscInt     *ics;
3247   PetscInt           j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3248   PetscReal          dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount;
3249   PetscInt           dtcount=(PetscInt)info->dtcount,nnz_max;
3250   PetscBool          missing;
3251 
3252   PetscFunctionBegin;
3253 
3254   if (dt      == PETSC_DEFAULT) dt      = 0.005;
3255   if (dtcol   == PETSC_DEFAULT) dtcol   = 0.01; /* XXX unused! */
3256   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3257 
3258   /* ------- symbolic factorization, can be reused ---------*/
3259   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3260   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3261   adiag=a->diag;
3262 
3263   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
3264 
3265   /* bdiag is location of diagonal in factor */
3266   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);     /* becomes b->diag */
3267   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */
3268 
3269   /* allocate row pointers bi */
3270   ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3271 
3272   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3273   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3274   nnz_max  = ai[n]+2*n*dtcount+2;
3275 
3276   ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3277   ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3278 
3279   /* put together the new matrix */
3280   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
3281   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
3282   b    = (Mat_SeqAIJ*)B->data;
3283   b->free_a       = PETSC_TRUE;
3284   b->free_ij      = PETSC_TRUE;
3285   b->singlemalloc = PETSC_FALSE;
3286   b->a          = ba;
3287   b->j          = bj;
3288   b->i          = bi;
3289   b->diag       = bdiag;
3290   b->ilen       = 0;
3291   b->imax       = 0;
3292   b->row        = isrow;
3293   b->col        = iscol;
3294   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
3295   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
3296   b->icol       = isicol;
3297   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3298 
3299   ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3300   b->maxnz = nnz_max;
3301 
3302   B->factortype            = MAT_FACTOR_ILUDT;
3303   B->info.factor_mallocs   = 0;
3304   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3305   CHKMEMQ;
3306   /* ------- end of symbolic factorization ---------*/
3307 
3308   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3309   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3310   ics  = ic;
3311 
3312   /* linked list for storing column indices of the active row */
3313   nlnk = n + 1;
3314   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3315 
3316   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3317   ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr);
3318   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3319   ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr);
3320   ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr);
3321 
3322   bi[0]    = 0;
3323   bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3324   bdiag_rev[n] = bdiag[0];
3325   bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3326   for (i=0; i<n; i++) {
3327     /* copy initial fill into linked list */
3328     nzi = 0; /* nonzeros for active row i */
3329     nzi = ai[r[i]+1] - ai[r[i]];
3330     if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3331     nzi_al = adiag[r[i]] - ai[r[i]];
3332     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3333     ajtmp = aj + ai[r[i]];
3334     ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3335 
3336     /* load in initial (unfactored row) */
3337     aatmp = a->a + ai[r[i]];
3338     for (j=0; j<nzi; j++) {
3339       rtmp[ics[*ajtmp++]] = *aatmp++;
3340     }
3341 
3342     /* add pivot rows into linked list */
3343     row = lnk[n];
3344     while (row < i ) {
3345       nzi_bl = bi[row+1] - bi[row] + 1;
3346       bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3347       ierr  = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
3348       nzi  += nlnk;
3349       row   = lnk[row];
3350     }
3351 
3352     /* copy data from lnk into jtmp, then initialize lnk */
3353     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
3354 
3355     /* numerical factorization */
3356     bjtmp = jtmp;
3357     row   = *bjtmp++; /* 1st pivot row */
3358     while  ( row < i ) {
3359       pc         = rtmp + row;
3360       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3361       multiplier = (*pc) * (*pv);
3362       *pc        = multiplier;
3363       if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */
3364         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3365         pv         = ba + bdiag[row+1] + 1;
3366         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3367         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3368         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3369         ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr);
3370       }
3371       row = *bjtmp++;
3372     }
3373 
3374     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3375     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3376     nzi_bl = 0; j = 0;
3377     while (jtmp[j] < i){ /* Note: jtmp is sorted */
3378       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3379       nzi_bl++; j++;
3380     }
3381     nzi_bu = nzi - nzi_bl -1;
3382     while (j < nzi){
3383       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3384       j++;
3385     }
3386 
3387     bjtmp = bj + bi[i];
3388     batmp = ba + bi[i];
3389     /* apply level dropping rule to L part */
3390     ncut = nzi_al + dtcount;
3391     if (ncut < nzi_bl){
3392       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
3393       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
3394     } else {
3395       ncut = nzi_bl;
3396     }
3397     for (j=0; j<ncut; j++){
3398       bjtmp[j] = jtmp[j];
3399       batmp[j] = vtmp[j];
3400       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3401     }
3402     bi[i+1] = bi[i] + ncut;
3403     nzi = ncut + 1;
3404 
3405     /* apply level dropping rule to U part */
3406     ncut = nzi_au + dtcount;
3407     if (ncut < nzi_bu){
3408       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
3409       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
3410     } else {
3411       ncut = nzi_bu;
3412     }
3413     nzi += ncut;
3414 
3415     /* mark bdiagonal */
3416     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3417     bdiag_rev[n-i-1] = bdiag[i+1];
3418     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3419     bjtmp = bj + bdiag[i];
3420     batmp = ba + bdiag[i];
3421     *bjtmp = i;
3422     *batmp = diag_tmp; /* rtmp[i]; */
3423     if (*batmp == 0.0) {
3424       *batmp = dt+shift;
3425       /* printf(" row %d add shift %g\n",i,shift); */
3426     }
3427     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3428     /* printf(" (%d,%g),",*bjtmp,*batmp); */
3429 
3430     bjtmp = bj + bdiag[i+1]+1;
3431     batmp = ba + bdiag[i+1]+1;
3432     for (k=0; k<ncut; k++){
3433       bjtmp[k] = jtmp[nzi_bl+1+k];
3434       batmp[k] = vtmp[nzi_bl+1+k];
3435       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3436     }
3437     /* printf("\n"); */
3438 
3439     im[i]   = nzi; /* used by PetscLLAddSortedLU() */
3440     /*
3441     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3442     printf(" ----------------------------\n");
3443     */
3444   } /* for (i=0; i<n; i++) */
3445   /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3446   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3447 
3448   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3449   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3450 
3451   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3452   ierr = PetscFree2(im,jtmp);CHKERRQ(ierr);
3453   ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr);
3454   ierr = PetscFree(bdiag_rev);CHKERRQ(ierr);
3455 
3456   ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
3457   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3458 
3459   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3460   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
3461   if (row_identity && icol_identity) {
3462     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3463   } else {
3464     B->ops->solve = MatSolve_SeqAIJ;
3465   }
3466 
3467   B->ops->solveadd          = 0;
3468   B->ops->solvetranspose    = 0;
3469   B->ops->solvetransposeadd = 0;
3470   B->ops->matsolve          = 0;
3471   B->assembled              = PETSC_TRUE;
3472   B->preallocated           = PETSC_TRUE;
3473   PetscFunctionReturn(0);
3474 }
3475 
3476 /* a wraper of MatILUDTFactor_SeqAIJ() */
3477 #undef __FUNCT__
3478 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
3479 /*
3480     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3481 */
3482 
3483 PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3484 {
3485   PetscErrorCode     ierr;
3486 
3487   PetscFunctionBegin;
3488   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
3489   PetscFunctionReturn(0);
3490 }
3491 
3492 /*
3493    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3494    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3495 */
3496 #undef __FUNCT__
3497 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
3498 /*
3499     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3500 */
3501 
3502 PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3503 {
3504   Mat            C=fact;
3505   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
3506   IS             isrow = b->row,isicol = b->icol;
3507   PetscErrorCode ierr;
3508   const PetscInt *r,*ic,*ics;
3509   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3510   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3511   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3512   PetscReal      dt=info->dt,shift=info->shiftamount;
3513   PetscBool      row_identity, col_identity;
3514 
3515   PetscFunctionBegin;
3516   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3517   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3518   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
3519   ics  = ic;
3520 
3521   for (i=0; i<n; i++){
3522     /* initialize rtmp array */
3523     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3524     bjtmp = bj + bi[i];
3525     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3526     rtmp[i] = 0.0;
3527     nzu   = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3528     bjtmp = bj + bdiag[i+1] + 1;
3529     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3530 
3531     /* load in initial unfactored row of A */
3532     /* printf("row %d\n",i); */
3533     nz    = ai[r[i]+1] - ai[r[i]];
3534     ajtmp = aj + ai[r[i]];
3535     v     = aa + ai[r[i]];
3536     for (j=0; j<nz; j++) {
3537       rtmp[ics[*ajtmp++]] = v[j];
3538       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3539     }
3540     /* printf("\n"); */
3541 
3542     /* numerical factorization */
3543     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3544     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3545     k = 0;
3546     while (k < nzl){
3547       row   = *bjtmp++;
3548       /* printf("  prow %d\n",row); */
3549       pc         = rtmp + row;
3550       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3551       multiplier = (*pc) * (*pv);
3552       *pc        = multiplier;
3553       if (PetscAbsScalar(multiplier) > dt){
3554         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3555         pv         = b->a + bdiag[row+1] + 1;
3556         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3557         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3558         /* ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); */
3559       }
3560       k++;
3561     }
3562 
3563     /* finished row so stick it into b->a */
3564     /* L-part */
3565     pv = b->a + bi[i] ;
3566     pj = bj + bi[i] ;
3567     nzl = bi[i+1] - bi[i];
3568     for (j=0; j<nzl; j++) {
3569       pv[j] = rtmp[pj[j]];
3570       /* printf(" (%d,%g),",pj[j],pv[j]); */
3571     }
3572 
3573     /* diagonal: invert diagonal entries for simplier triangular solves */
3574     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3575     b->a[bdiag[i]] = 1.0/rtmp[i];
3576     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3577 
3578     /* U-part */
3579     pv = b->a + bdiag[i+1] + 1;
3580     pj = bj + bdiag[i+1] + 1;
3581     nzu = bdiag[i] - bdiag[i+1] - 1;
3582     for (j=0; j<nzu; j++) {
3583       pv[j] = rtmp[pj[j]];
3584       /* printf(" (%d,%g),",pj[j],pv[j]); */
3585     }
3586     /* printf("\n"); */
3587   }
3588 
3589   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3590   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3591   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3592 
3593   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3594   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
3595   if (row_identity && col_identity) {
3596     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
3597   } else {
3598     C->ops->solve   = MatSolve_SeqAIJ;
3599   }
3600   C->ops->solveadd           = 0;
3601   C->ops->solvetranspose     = 0;
3602   C->ops->solvetransposeadd  = 0;
3603   C->ops->matsolve           = 0;
3604   C->assembled    = PETSC_TRUE;
3605   C->preallocated = PETSC_TRUE;
3606   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
3607   PetscFunctionReturn(0);
3608 }
3609