xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 2205254efee3a00a594e5e2a3a70f74dcb40bc03)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 EXTERN_C_BEGIN
8 #undef __FUNCT__
9 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ"
10 /*
11       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
12 
13       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
14 */
15 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
16 {
17   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
18   PetscErrorCode    ierr;
19   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
20   const PetscInt    *ai = a->i, *aj = a->j;
21   const PetscScalar *aa = a->a;
22   PetscBool         *done;
23   PetscReal         best,past = 0,future;
24 
25   PetscFunctionBegin;
26   /* pick initial row */
27   best = -1;
28   for (i=0; i<n; i++) {
29     future = 0.0;
30     for (j=ai[i]; j<ai[i+1]; j++) {
31       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
32       else              past  = PetscAbsScalar(aa[j]);
33     }
34     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
35     if (past/future > best) {
36       best    = past/future;
37       current = i;
38     }
39   }
40 
41   ierr     = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr);
42   ierr     = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr);
43   ierr     = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr);
44   order[0] = current;
45   for (i=0; i<n-1; i++) {
46     done[current] = PETSC_TRUE;
47     best          = -1;
48     /* loop over all neighbors of current pivot */
49     for (j=ai[current]; j<ai[current+1]; j++) {
50       jj = aj[j];
51       if (done[jj]) continue;
52       /* loop over columns of potential next row computing weights for below and above diagonal */
53       past = future = 0.0;
54       for (k=ai[jj]; k<ai[jj+1]; k++) {
55         kk = aj[k];
56         if (done[kk]) past += PetscAbsScalar(aa[k]);
57         else if (kk != jj) future += PetscAbsScalar(aa[k]);
58       }
59       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
60       if (past/future > best) {
61         best       = past/future;
62         newcurrent = jj;
63       }
64     }
65     if (best == -1) { /* no neighbors to select from so select best of all that remain */
66       best = -1;
67       for (k=0; k<n; k++) {
68         if (done[k]) continue;
69         future = 0.0;
70         past   = 0.0;
71         for (j=ai[k]; j<ai[k+1]; j++) {
72           kk = aj[j];
73           if (done[kk])       past += PetscAbsScalar(aa[j]);
74           else if (kk != k) future += PetscAbsScalar(aa[j]);
75         }
76         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
77         if (past/future > best) {
78           best       = past/future;
79           newcurrent = k;
80         }
81       }
82     }
83     if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
84     current    = newcurrent;
85     order[i+1] = current;
86   }
87   ierr  = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr);
88   *icol = *irow;
89   ierr  = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr);
90   ierr  = PetscFree(done);CHKERRQ(ierr);
91   ierr  = PetscFree(order);CHKERRQ(ierr);
92   PetscFunctionReturn(0);
93 }
94 EXTERN_C_END
95 
96 EXTERN_C_BEGIN
97 #undef __FUNCT__
98 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
99 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool  *flg)
100 {
101   PetscFunctionBegin;
102   *flg = PETSC_TRUE;
103   PetscFunctionReturn(0);
104 }
105 EXTERN_C_END
106 
107 EXTERN_C_BEGIN
108 #undef __FUNCT__
109 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
110 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
111 {
112   PetscInt       n = A->rmap->n;
113   PetscErrorCode ierr;
114 
115   PetscFunctionBegin;
116 #if defined(PETSC_USE_COMPLEX)
117   if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported");
118 #endif
119   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
120   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
121   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
122     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
123 
124     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
125     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
126 
127     ierr = MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
128   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
129     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
130     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
131 
132     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
133     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
134   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
135   (*B)->factortype = ftype;
136   PetscFunctionReturn(0);
137 }
138 EXTERN_C_END
139 
140 #undef __FUNCT__
141 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace"
142 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
143 {
144   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
145   IS                 isicol;
146   PetscErrorCode     ierr;
147   const PetscInt     *r,*ic;
148   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
149   PetscInt           *bi,*bj,*ajtmp;
150   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
151   PetscReal          f;
152   PetscInt           nlnk,*lnk,k,**bi_ptr;
153   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
154   PetscBT            lnkbt;
155 
156   PetscFunctionBegin;
157   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
158   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
159   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
160   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
161 
162   /* get new row pointers */
163   ierr  = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
164   bi[0] = 0;
165 
166   /* bdiag is location of diagonal in factor */
167   ierr     = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
168   bdiag[0] = 0;
169 
170   /* linked list for storing column indices of the active row */
171   nlnk = n + 1;
172   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
173 
174   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
175 
176   /* initial FreeSpace size is f*(ai[n]+1) */
177   f             = info->fill;
178   ierr          = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
179   current_space = free_space;
180 
181   for (i=0; i<n; i++) {
182     /* copy previous fill into linked list */
183     nzi = 0;
184     nnz = ai[r[i]+1] - ai[r[i]];
185     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
186     ajtmp = aj + ai[r[i]];
187     ierr  = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
188     nzi  += nlnk;
189 
190     /* add pivot rows into linked list */
191     row = lnk[n];
192     while (row < i) {
193       nzbd  = bdiag[row] - bi[row] + 1;   /* num of entries in the row with column index <= row */
194       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
195       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
196       nzi  += nlnk;
197       row   = lnk[row];
198     }
199     bi[i+1] = bi[i] + nzi;
200     im[i]   = nzi;
201 
202     /* mark bdiag */
203     nzbd = 0;
204     nnz  = nzi;
205     k    = lnk[n];
206     while (nnz-- && k < i) {
207       nzbd++;
208       k = lnk[k];
209     }
210     bdiag[i] = bi[i] + nzbd;
211 
212     /* if free space is not available, make more free space */
213     if (current_space->local_remaining<nzi) {
214       nnz  = (n - i)*nzi; /* estimated and max additional space needed */
215       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
216       reallocs++;
217     }
218 
219     /* copy data into free space, then initialize lnk */
220     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
221 
222     bi_ptr[i]                       = current_space->array;
223     current_space->array           += nzi;
224     current_space->local_used      += nzi;
225     current_space->local_remaining -= nzi;
226   }
227 #if defined(PETSC_USE_INFO)
228   if (ai[n] != 0) {
229     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
230     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
231     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
232     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
233     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
234   } else {
235     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
236   }
237 #endif
238 
239   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
240   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
241 
242   /* destroy list of free space and other temporary array(s) */
243   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
244   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
245   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
246   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
247 
248   /* put together the new matrix */
249   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
250   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
251   b    = (Mat_SeqAIJ*)(B)->data;
252 
253   b->free_a       = PETSC_TRUE;
254   b->free_ij      = PETSC_TRUE;
255   b->singlemalloc = PETSC_FALSE;
256 
257   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
258   b->j          = bj;
259   b->i          = bi;
260   b->diag       = bdiag;
261   b->ilen       = 0;
262   b->imax       = 0;
263   b->row        = isrow;
264   b->col        = iscol;
265   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
266   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
267   b->icol       = isicol;
268   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
269 
270   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
271   ierr     = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
272   b->maxnz = b->nz = bi[n];
273 
274   (B)->factortype            = MAT_FACTOR_LU;
275   (B)->info.factor_mallocs   = reallocs;
276   (B)->info.fill_ratio_given = f;
277 
278   if (ai[n]) {
279     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
280   } else {
281     (B)->info.fill_ratio_needed = 0.0;
282   }
283   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
284   if (a->inode.size) {
285     (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
286   }
287   PetscFunctionReturn(0);
288 }
289 
290 #undef __FUNCT__
291 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
292 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
293 {
294   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
295   IS                 isicol;
296   PetscErrorCode     ierr;
297   const PetscInt     *r,*ic;
298   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
299   PetscInt           *bi,*bj,*ajtmp;
300   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
301   PetscReal          f;
302   PetscInt           nlnk,*lnk,k,**bi_ptr;
303   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
304   PetscBT            lnkbt;
305 
306   PetscFunctionBegin;
307   /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */
308   /*
309   PetscBool          olddatastruct=PETSC_FALSE;
310   ierr = PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
311   if (olddatastruct) {
312     ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr);
313     PetscFunctionReturn(0);
314   }
315   */
316   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
317   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
318   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
319   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
320 
321   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
322   ierr  = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
323   ierr  = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
324   bi[0] = bdiag[0] = 0;
325 
326   /* linked list for storing column indices of the active row */
327   nlnk = n + 1;
328   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
329 
330   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
331 
332   /* initial FreeSpace size is f*(ai[n]+1) */
333   f             = info->fill;
334   ierr          = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
335   current_space = free_space;
336 
337   for (i=0; i<n; i++) {
338     /* copy previous fill into linked list */
339     nzi = 0;
340     nnz = ai[r[i]+1] - ai[r[i]];
341     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
342     ajtmp = aj + ai[r[i]];
343     ierr  = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
344     nzi  += nlnk;
345 
346     /* add pivot rows into linked list */
347     row = lnk[n];
348     while (row < i) {
349       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
350       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
351       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
352       nzi  += nlnk;
353       row   = lnk[row];
354     }
355     bi[i+1] = bi[i] + nzi;
356     im[i]   = nzi;
357 
358     /* mark bdiag */
359     nzbd = 0;
360     nnz  = nzi;
361     k    = lnk[n];
362     while (nnz-- && k < i) {
363       nzbd++;
364       k = lnk[k];
365     }
366     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
367 
368     /* if free space is not available, make more free space */
369     if (current_space->local_remaining<nzi) {
370       nnz  = 2*(n - i)*nzi; /* estimated and max additional space needed */
371       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
372       reallocs++;
373     }
374 
375     /* copy data into free space, then initialize lnk */
376     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
377 
378     bi_ptr[i]                       = current_space->array;
379     current_space->array           += nzi;
380     current_space->local_used      += nzi;
381     current_space->local_remaining -= nzi;
382   }
383 
384   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
385   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
386 
387   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
388   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
389   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
390   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
391   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
392 
393   /* put together the new matrix */
394   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
395   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
396   b    = (Mat_SeqAIJ*)(B)->data;
397 
398   b->free_a       = PETSC_TRUE;
399   b->free_ij      = PETSC_TRUE;
400   b->singlemalloc = PETSC_FALSE;
401 
402   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
403 
404   b->j          = bj;
405   b->i          = bi;
406   b->diag       = bdiag;
407   b->ilen       = 0;
408   b->imax       = 0;
409   b->row        = isrow;
410   b->col        = iscol;
411   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
412   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
413   b->icol       = isicol;
414   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
415 
416   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
417   ierr     = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
418   b->maxnz = b->nz = bdiag[0]+1;
419 
420   B->factortype            = MAT_FACTOR_LU;
421   B->info.factor_mallocs   = reallocs;
422   B->info.fill_ratio_given = f;
423 
424   if (ai[n]) {
425     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
426   } else {
427     B->info.fill_ratio_needed = 0.0;
428   }
429 #if defined(PETSC_USE_INFO)
430   if (ai[n] != 0) {
431     PetscReal af = B->info.fill_ratio_needed;
432     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
433     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
434     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
435     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
436   } else {
437     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
438   }
439 #endif
440   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
441   if (a->inode.size) {
442     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
443   }
444   PetscFunctionReturn(0);
445 }
446 
447 /*
448     Trouble in factorization, should we dump the original matrix?
449 */
450 #undef __FUNCT__
451 #define __FUNCT__ "MatFactorDumpMatrix"
452 PetscErrorCode MatFactorDumpMatrix(Mat A)
453 {
454   PetscErrorCode ierr;
455   PetscBool      flg = PETSC_FALSE;
456 
457   PetscFunctionBegin;
458   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr);
459   if (flg) {
460     PetscViewer viewer;
461     char        filename[PETSC_MAX_PATH_LEN];
462 
463     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
464     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
465     ierr = MatView(A,viewer);CHKERRQ(ierr);
466     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
467   }
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNCT__
472 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
473 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
474 {
475   Mat             C     =B;
476   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
477   IS              isrow = b->row,isicol = b->icol;
478   PetscErrorCode  ierr;
479   const PetscInt  *r,*ic,*ics;
480   const PetscInt  n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
481   PetscInt        i,j,k,nz,nzL,row,*pj;
482   const PetscInt  *ajtmp,*bjtmp;
483   MatScalar       *rtmp,*pc,multiplier,*pv;
484   const MatScalar *aa=a->a,*v;
485   PetscBool       row_identity,col_identity;
486   FactorShiftCtx  sctx;
487   const PetscInt  *ddiag;
488   PetscReal       rs;
489   MatScalar       d;
490 
491   PetscFunctionBegin;
492   /* MatPivotSetUp(): initialize shift context sctx */
493   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
494 
495   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
496     ddiag          = a->diag;
497     sctx.shift_top = info->zeropivot;
498     for (i=0; i<n; i++) {
499       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
500       d  = (aa)[ddiag[i]];
501       rs = -PetscAbsScalar(d) - PetscRealPart(d);
502       v  = aa+ai[i];
503       nz = ai[i+1] - ai[i];
504       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
505       if (rs>sctx.shift_top) sctx.shift_top = rs;
506     }
507     sctx.shift_top *= 1.1;
508     sctx.nshift_max = 5;
509     sctx.shift_lo   = 0.;
510     sctx.shift_hi   = 1.;
511   }
512 
513   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
514   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
515   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
516   ics  = ic;
517 
518   do {
519     sctx.newshift = PETSC_FALSE;
520     for (i=0; i<n; i++) {
521       /* zero rtmp */
522       /* L part */
523       nz    = bi[i+1] - bi[i];
524       bjtmp = bj + bi[i];
525       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
526 
527       /* U part */
528       nz    = bdiag[i]-bdiag[i+1];
529       bjtmp = bj + bdiag[i+1]+1;
530       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
531 
532       /* load in initial (unfactored row) */
533       nz    = ai[r[i]+1] - ai[r[i]];
534       ajtmp = aj + ai[r[i]];
535       v     = aa + ai[r[i]];
536       for (j=0; j<nz; j++) {
537         rtmp[ics[ajtmp[j]]] = v[j];
538       }
539       /* ZeropivotApply() */
540       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
541 
542       /* elimination */
543       bjtmp = bj + bi[i];
544       row   = *bjtmp++;
545       nzL   = bi[i+1] - bi[i];
546       for (k=0; k < nzL; k++) {
547         pc = rtmp + row;
548         if (*pc != 0.0) {
549           pv         = b->a + bdiag[row];
550           multiplier = *pc * (*pv);
551           *pc        = multiplier;
552 
553           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
554           pv = b->a + bdiag[row+1]+1;
555           nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
556 
557           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
558           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
559         }
560         row = *bjtmp++;
561       }
562 
563       /* finished row so stick it into b->a */
564       rs = 0.0;
565       /* L part */
566       pv = b->a + bi[i];
567       pj = b->j + bi[i];
568       nz = bi[i+1] - bi[i];
569       for (j=0; j<nz; j++) {
570         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
571       }
572 
573       /* U part */
574       pv = b->a + bdiag[i+1]+1;
575       pj = b->j + bdiag[i+1]+1;
576       nz = bdiag[i] - bdiag[i+1]-1;
577       for (j=0; j<nz; j++) {
578         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
579       }
580 
581       sctx.rs = rs;
582       sctx.pv = rtmp[i];
583       ierr    = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
584       if (sctx.newshift) break; /* break for-loop */
585       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
586 
587       /* Mark diagonal and invert diagonal for simplier triangular solves */
588       pv  = b->a + bdiag[i];
589       *pv = 1.0/rtmp[i];
590 
591     } /* endof for (i=0; i<n; i++) { */
592 
593     /* MatPivotRefine() */
594     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
595       /*
596        * if no shift in this attempt & shifting & started shifting & can refine,
597        * then try lower shift
598        */
599       sctx.shift_hi       = sctx.shift_fraction;
600       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
601       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
602       sctx.newshift       = PETSC_TRUE;
603       sctx.nshift++;
604     }
605   } while (sctx.newshift);
606 
607   ierr = PetscFree(rtmp);CHKERRQ(ierr);
608   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
609   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
610 
611   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
612   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
613   if (row_identity && col_identity) {
614     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
615   } else {
616     C->ops->solve = MatSolve_SeqAIJ;
617   }
618   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
619   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
620   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
621   C->ops->matsolve          = MatMatSolve_SeqAIJ;
622   C->assembled              = PETSC_TRUE;
623   C->preallocated           = PETSC_TRUE;
624 
625   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
626 
627   /* MatShiftView(A,info,&sctx) */
628   if (sctx.nshift) {
629     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
630       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
631     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
632       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
633     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
634       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
635     }
636   }
637   ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr);
638   PetscFunctionReturn(0);
639 }
640 
641 #undef __FUNCT__
642 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace"
643 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
644 {
645   Mat             C     =B;
646   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
647   IS              isrow = b->row,isicol = b->icol;
648   PetscErrorCode  ierr;
649   const PetscInt  *r,*ic,*ics;
650   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
651   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
652   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
653   MatScalar       *pv,*rtmp,*pc,multiplier,d;
654   const MatScalar *v,*aa=a->a;
655   PetscReal       rs=0.0;
656   FactorShiftCtx  sctx;
657   const PetscInt  *ddiag;
658   PetscBool       row_identity, col_identity;
659 
660   PetscFunctionBegin;
661   /* MatPivotSetUp(): initialize shift context sctx */
662   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
663 
664   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
665     ddiag          = a->diag;
666     sctx.shift_top = info->zeropivot;
667     for (i=0; i<n; i++) {
668       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
669       d  = (aa)[ddiag[i]];
670       rs = -PetscAbsScalar(d) - PetscRealPart(d);
671       v  = aa+ai[i];
672       nz = ai[i+1] - ai[i];
673       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
674       if (rs>sctx.shift_top) sctx.shift_top = rs;
675     }
676     sctx.shift_top *= 1.1;
677     sctx.nshift_max = 5;
678     sctx.shift_lo   = 0.;
679     sctx.shift_hi   = 1.;
680   }
681 
682   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
683   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
684   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
685   ics  = ic;
686 
687   do {
688     sctx.newshift = PETSC_FALSE;
689     for (i=0; i<n; i++) {
690       nz    = bi[i+1] - bi[i];
691       bjtmp = bj + bi[i];
692       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
693 
694       /* load in initial (unfactored row) */
695       nz    = ai[r[i]+1] - ai[r[i]];
696       ajtmp = aj + ai[r[i]];
697       v     = aa + ai[r[i]];
698       for (j=0; j<nz; j++) {
699         rtmp[ics[ajtmp[j]]] = v[j];
700       }
701       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
702 
703       row = *bjtmp++;
704       while  (row < i) {
705         pc = rtmp + row;
706         if (*pc != 0.0) {
707           pv         = b->a + diag_offset[row];
708           pj         = b->j + diag_offset[row] + 1;
709           multiplier = *pc / *pv++;
710           *pc        = multiplier;
711           nz         = bi[row+1] - diag_offset[row] - 1;
712           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
713           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
714         }
715         row = *bjtmp++;
716       }
717       /* finished row so stick it into b->a */
718       pv   = b->a + bi[i];
719       pj   = b->j + bi[i];
720       nz   = bi[i+1] - bi[i];
721       diag = diag_offset[i] - bi[i];
722       rs   = 0.0;
723       for (j=0; j<nz; j++) {
724         pv[j] = rtmp[pj[j]];
725         rs   += PetscAbsScalar(pv[j]);
726       }
727       rs -= PetscAbsScalar(pv[diag]);
728 
729       sctx.rs = rs;
730       sctx.pv = pv[diag];
731       ierr    = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
732       if (sctx.newshift) break;
733       pv[diag] = sctx.pv;
734     }
735 
736     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
737       /*
738        * if no shift in this attempt & shifting & started shifting & can refine,
739        * then try lower shift
740        */
741       sctx.shift_hi       = sctx.shift_fraction;
742       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
743       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
744       sctx.newshift       = PETSC_TRUE;
745       sctx.nshift++;
746     }
747   } while (sctx.newshift);
748 
749   /* invert diagonal entries for simplier triangular solves */
750   for (i=0; i<n; i++) {
751     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
752   }
753   ierr = PetscFree(rtmp);CHKERRQ(ierr);
754   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
755   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
756 
757   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
758   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
759   if (row_identity && col_identity) {
760     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
761   } else {
762     C->ops->solve = MatSolve_SeqAIJ_inplace;
763   }
764   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
765   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
766   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
767   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
768 
769   C->assembled    = PETSC_TRUE;
770   C->preallocated = PETSC_TRUE;
771 
772   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
773   if (sctx.nshift) {
774     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
775       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
776     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
777       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
778     }
779   }
780   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
781   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
782 
783   ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr);
784   PetscFunctionReturn(0);
785 }
786 
787 /*
788    This routine implements inplace ILU(0) with row or/and column permutations.
789    Input:
790      A - original matrix
791    Output;
792      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
793          a->j (col index) is permuted by the inverse of colperm, then sorted
794          a->a reordered accordingly with a->j
795          a->diag (ptr to diagonal elements) is updated.
796 */
797 #undef __FUNCT__
798 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
799 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
800 {
801   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data;
802   IS              isrow = a->row,isicol = a->icol;
803   PetscErrorCode  ierr;
804   const PetscInt  *r,*ic,*ics;
805   PetscInt        i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
806   PetscInt        *ajtmp,nz,row;
807   PetscInt        *diag = a->diag,nbdiag,*pj;
808   PetscScalar     *rtmp,*pc,multiplier,d;
809   MatScalar       *pv,*v;
810   PetscReal       rs;
811   FactorShiftCtx  sctx;
812   const MatScalar *aa=a->a,*vtmp;
813 
814   PetscFunctionBegin;
815   if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
816 
817   /* MatPivotSetUp(): initialize shift context sctx */
818   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
819 
820   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
821     const PetscInt *ddiag = a->diag;
822     sctx.shift_top = info->zeropivot;
823     for (i=0; i<n; i++) {
824       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
825       d    = (aa)[ddiag[i]];
826       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
827       vtmp = aa+ai[i];
828       nz   = ai[i+1] - ai[i];
829       for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
830       if (rs>sctx.shift_top) sctx.shift_top = rs;
831     }
832     sctx.shift_top *= 1.1;
833     sctx.nshift_max = 5;
834     sctx.shift_lo   = 0.;
835     sctx.shift_hi   = 1.;
836   }
837 
838   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
839   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
840   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
841   ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
842   ics  = ic;
843 
844 #if defined(MV)
845   sctx.shift_top      = 0.;
846   sctx.nshift_max     = 0;
847   sctx.shift_lo       = 0.;
848   sctx.shift_hi       = 0.;
849   sctx.shift_fraction = 0.;
850 
851   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
852     sctx.shift_top = 0.;
853     for (i=0; i<n; i++) {
854       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
855       d  = (a->a)[diag[i]];
856       rs = -PetscAbsScalar(d) - PetscRealPart(d);
857       v  = a->a+ai[i];
858       nz = ai[i+1] - ai[i];
859       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
860       if (rs>sctx.shift_top) sctx.shift_top = rs;
861     }
862     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
863     sctx.shift_top *= 1.1;
864     sctx.nshift_max = 5;
865     sctx.shift_lo   = 0.;
866     sctx.shift_hi   = 1.;
867   }
868 
869   sctx.shift_amount = 0.;
870   sctx.nshift       = 0;
871 #endif
872 
873   do {
874     sctx.newshift = PETSC_FALSE;
875     for (i=0; i<n; i++) {
876       /* load in initial unfactored row */
877       nz    = ai[r[i]+1] - ai[r[i]];
878       ajtmp = aj + ai[r[i]];
879       v     = a->a + ai[r[i]];
880       /* sort permuted ajtmp and values v accordingly */
881       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
882       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
883 
884       diag[r[i]] = ai[r[i]];
885       for (j=0; j<nz; j++) {
886         rtmp[ajtmp[j]] = v[j];
887         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
888       }
889       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
890 
891       row = *ajtmp++;
892       while  (row < i) {
893         pc = rtmp + row;
894         if (*pc != 0.0) {
895           pv = a->a + diag[r[row]];
896           pj = aj + diag[r[row]] + 1;
897 
898           multiplier = *pc / *pv++;
899           *pc        = multiplier;
900           nz         = ai[r[row]+1] - diag[r[row]] - 1;
901           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
902           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
903         }
904         row = *ajtmp++;
905       }
906       /* finished row so overwrite it onto a->a */
907       pv     = a->a + ai[r[i]];
908       pj     = aj + ai[r[i]];
909       nz     = ai[r[i]+1] - ai[r[i]];
910       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
911 
912       rs = 0.0;
913       for (j=0; j<nz; j++) {
914         pv[j] = rtmp[pj[j]];
915         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
916       }
917 
918       sctx.rs = rs;
919       sctx.pv = pv[nbdiag];
920       ierr    = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
921       if (sctx.newshift) break;
922       pv[nbdiag] = sctx.pv;
923     }
924 
925     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
926       /*
927        * if no shift in this attempt & shifting & started shifting & can refine,
928        * then try lower shift
929        */
930       sctx.shift_hi       = sctx.shift_fraction;
931       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
932       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
933       sctx.newshift       = PETSC_TRUE;
934       sctx.nshift++;
935     }
936   } while (sctx.newshift);
937 
938   /* invert diagonal entries for simplier triangular solves */
939   for (i=0; i<n; i++) {
940     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
941   }
942 
943   ierr = PetscFree(rtmp);CHKERRQ(ierr);
944   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
945   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
946 
947   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
948   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
949   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
950   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
951 
952   A->assembled    = PETSC_TRUE;
953   A->preallocated = PETSC_TRUE;
954 
955   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
956   if (sctx.nshift) {
957     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
958       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
959     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
960       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
961     }
962   }
963   PetscFunctionReturn(0);
964 }
965 
966 /* ----------------------------------------------------------- */
967 #undef __FUNCT__
968 #define __FUNCT__ "MatLUFactor_SeqAIJ"
969 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
970 {
971   PetscErrorCode ierr;
972   Mat            C;
973 
974   PetscFunctionBegin;
975   ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
976   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
977   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
978 
979   A->ops->solve          = C->ops->solve;
980   A->ops->solvetranspose = C->ops->solvetranspose;
981 
982   ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
983   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
984   PetscFunctionReturn(0);
985 }
986 /* ----------------------------------------------------------- */
987 
988 
989 #undef __FUNCT__
990 #define __FUNCT__ "MatSolve_SeqAIJ_inplace"
991 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
992 {
993   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
994   IS                iscol = a->col,isrow = a->row;
995   PetscErrorCode    ierr;
996   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
997   PetscInt          nz;
998   const PetscInt    *rout,*cout,*r,*c;
999   PetscScalar       *x,*tmp,*tmps,sum;
1000   const PetscScalar *b;
1001   const MatScalar   *aa = a->a,*v;
1002 
1003   PetscFunctionBegin;
1004   if (!n) PetscFunctionReturn(0);
1005 
1006   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1007   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1008   tmp  = a->solve_work;
1009 
1010   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1011   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1012 
1013   /* forward solve the lower triangular */
1014   tmp[0] = b[*r++];
1015   tmps   = tmp;
1016   for (i=1; i<n; i++) {
1017     v   = aa + ai[i];
1018     vi  = aj + ai[i];
1019     nz  = a->diag[i] - ai[i];
1020     sum = b[*r++];
1021     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1022     tmp[i] = sum;
1023   }
1024 
1025   /* backward solve the upper triangular */
1026   for (i=n-1; i>=0; i--) {
1027     v   = aa + a->diag[i] + 1;
1028     vi  = aj + a->diag[i] + 1;
1029     nz  = ai[i+1] - a->diag[i] - 1;
1030     sum = tmp[i];
1031     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1032     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1033   }
1034 
1035   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1036   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1037   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1038   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1039   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 #undef __FUNCT__
1044 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace"
1045 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1046 {
1047   Mat_SeqAIJ      *a    = (Mat_SeqAIJ*)A->data;
1048   IS              iscol = a->col,isrow = a->row;
1049   PetscErrorCode  ierr;
1050   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1051   PetscInt        nz,neq;
1052   const PetscInt  *rout,*cout,*r,*c;
1053   PetscScalar     *x,*b,*tmp,*tmps,sum;
1054   const MatScalar *aa = a->a,*v;
1055   PetscBool       bisdense,xisdense;
1056 
1057   PetscFunctionBegin;
1058   if (!n) PetscFunctionReturn(0);
1059 
1060   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1061   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1062   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1063   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1064 
1065   ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
1066   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1067 
1068   tmp  = a->solve_work;
1069   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1070   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1071 
1072   for (neq=0; neq<B->cmap->n; neq++) {
1073     /* forward solve the lower triangular */
1074     tmp[0] = b[r[0]];
1075     tmps   = tmp;
1076     for (i=1; i<n; i++) {
1077       v   = aa + ai[i];
1078       vi  = aj + ai[i];
1079       nz  = a->diag[i] - ai[i];
1080       sum = b[r[i]];
1081       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1082       tmp[i] = sum;
1083     }
1084     /* backward solve the upper triangular */
1085     for (i=n-1; i>=0; i--) {
1086       v   = aa + a->diag[i] + 1;
1087       vi  = aj + a->diag[i] + 1;
1088       nz  = ai[i+1] - a->diag[i] - 1;
1089       sum = tmp[i];
1090       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1091       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1092     }
1093 
1094     b += n;
1095     x += n;
1096   }
1097   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1098   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1099   ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
1100   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1101   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatMatSolve_SeqAIJ"
1107 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1108 {
1109   Mat_SeqAIJ      *a    = (Mat_SeqAIJ*)A->data;
1110   IS              iscol = a->col,isrow = a->row;
1111   PetscErrorCode  ierr;
1112   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1113   PetscInt        nz,neq;
1114   const PetscInt  *rout,*cout,*r,*c;
1115   PetscScalar     *x,*b,*tmp,sum;
1116   const MatScalar *aa = a->a,*v;
1117   PetscBool       bisdense,xisdense;
1118 
1119   PetscFunctionBegin;
1120   if (!n) PetscFunctionReturn(0);
1121 
1122   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1123   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1124   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1125   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1126 
1127   ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
1128   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1129 
1130   tmp  = a->solve_work;
1131   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1132   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1133 
1134   for (neq=0; neq<B->cmap->n; neq++) {
1135     /* forward solve the lower triangular */
1136     tmp[0] = b[r[0]];
1137     v      = aa;
1138     vi     = aj;
1139     for (i=1; i<n; i++) {
1140       nz  = ai[i+1] - ai[i];
1141       sum = b[r[i]];
1142       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1143       tmp[i] = sum;
1144       v     += nz; vi += nz;
1145     }
1146 
1147     /* backward solve the upper triangular */
1148     for (i=n-1; i>=0; i--) {
1149       v   = aa + adiag[i+1]+1;
1150       vi  = aj + adiag[i+1]+1;
1151       nz  = adiag[i]-adiag[i+1]-1;
1152       sum = tmp[i];
1153       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1154       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1155     }
1156 
1157     b += n;
1158     x += n;
1159   }
1160   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1161   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1162   ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
1163   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1164   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 #undef __FUNCT__
1169 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
1170 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1171 {
1172   Mat_SeqAIJ      *a    = (Mat_SeqAIJ*)A->data;
1173   IS              iscol = a->col,isrow = a->row;
1174   PetscErrorCode  ierr;
1175   const PetscInt  *r,*c,*rout,*cout;
1176   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1177   PetscInt        nz,row;
1178   PetscScalar     *x,*b,*tmp,*tmps,sum;
1179   const MatScalar *aa = a->a,*v;
1180 
1181   PetscFunctionBegin;
1182   if (!n) PetscFunctionReturn(0);
1183 
1184   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1185   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1186   tmp  = a->solve_work;
1187 
1188   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1189   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1190 
1191   /* forward solve the lower triangular */
1192   tmp[0] = b[*r++];
1193   tmps   = tmp;
1194   for (row=1; row<n; row++) {
1195     i   = rout[row]; /* permuted row */
1196     v   = aa + ai[i];
1197     vi  = aj + ai[i];
1198     nz  = a->diag[i] - ai[i];
1199     sum = b[*r++];
1200     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1201     tmp[row] = sum;
1202   }
1203 
1204   /* backward solve the upper triangular */
1205   for (row=n-1; row>=0; row--) {
1206     i   = rout[row]; /* permuted row */
1207     v   = aa + a->diag[i] + 1;
1208     vi  = aj + a->diag[i] + 1;
1209     nz  = ai[i+1] - a->diag[i] - 1;
1210     sum = tmp[row];
1211     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1212     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1213   }
1214 
1215   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1216   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1217   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1218   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1219   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 /* ----------------------------------------------------------- */
1224 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1225 #undef __FUNCT__
1226 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace"
1227 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1228 {
1229   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1230   PetscErrorCode    ierr;
1231   PetscInt          n   = A->rmap->n;
1232   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1233   PetscScalar       *x;
1234   const PetscScalar *b;
1235   const MatScalar   *aa = a->a;
1236 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1237   PetscInt        adiag_i,i,nz,ai_i;
1238   const PetscInt  *vi;
1239   const MatScalar *v;
1240   PetscScalar     sum;
1241 #endif
1242 
1243   PetscFunctionBegin;
1244   if (!n) PetscFunctionReturn(0);
1245 
1246   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1247   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1248 
1249 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1250   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1251 #else
1252   /* forward solve the lower triangular */
1253   x[0] = b[0];
1254   for (i=1; i<n; i++) {
1255     ai_i = ai[i];
1256     v    = aa + ai_i;
1257     vi   = aj + ai_i;
1258     nz   = adiag[i] - ai_i;
1259     sum  = b[i];
1260     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1261     x[i] = sum;
1262   }
1263 
1264   /* backward solve the upper triangular */
1265   for (i=n-1; i>=0; i--) {
1266     adiag_i = adiag[i];
1267     v       = aa + adiag_i + 1;
1268     vi      = aj + adiag_i + 1;
1269     nz      = ai[i+1] - adiag_i - 1;
1270     sum     = x[i];
1271     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1272     x[i] = sum*aa[adiag_i];
1273   }
1274 #endif
1275   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1276   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1277   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace"
1283 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1284 {
1285   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1286   IS                iscol = a->col,isrow = a->row;
1287   PetscErrorCode    ierr;
1288   PetscInt          i, n = A->rmap->n,j;
1289   PetscInt          nz;
1290   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1291   PetscScalar       *x,*tmp,sum;
1292   const PetscScalar *b;
1293   const MatScalar   *aa = a->a,*v;
1294 
1295   PetscFunctionBegin;
1296   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1297 
1298   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1299   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1300   tmp  = a->solve_work;
1301 
1302   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1303   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1304 
1305   /* forward solve the lower triangular */
1306   tmp[0] = b[*r++];
1307   for (i=1; i<n; i++) {
1308     v   = aa + ai[i];
1309     vi  = aj + ai[i];
1310     nz  = a->diag[i] - ai[i];
1311     sum = b[*r++];
1312     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1313     tmp[i] = sum;
1314   }
1315 
1316   /* backward solve the upper triangular */
1317   for (i=n-1; i>=0; i--) {
1318     v   = aa + a->diag[i] + 1;
1319     vi  = aj + a->diag[i] + 1;
1320     nz  = ai[i+1] - a->diag[i] - 1;
1321     sum = tmp[i];
1322     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1323     tmp[i]   = sum*aa[a->diag[i]];
1324     x[*c--] += tmp[i];
1325   }
1326 
1327   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1328   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1329   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1330   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1331   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 #undef __FUNCT__
1336 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1337 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1338 {
1339   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1340   IS                iscol = a->col,isrow = a->row;
1341   PetscErrorCode    ierr;
1342   PetscInt          i, n = A->rmap->n,j;
1343   PetscInt          nz;
1344   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1345   PetscScalar       *x,*tmp,sum;
1346   const PetscScalar *b;
1347   const MatScalar   *aa = a->a,*v;
1348 
1349   PetscFunctionBegin;
1350   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1351 
1352   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1353   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1354   tmp  = a->solve_work;
1355 
1356   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1357   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1358 
1359   /* forward solve the lower triangular */
1360   tmp[0] = b[r[0]];
1361   v      = aa;
1362   vi     = aj;
1363   for (i=1; i<n; i++) {
1364     nz  = ai[i+1] - ai[i];
1365     sum = b[r[i]];
1366     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1367     tmp[i] = sum;
1368     v     += nz;
1369     vi    += nz;
1370   }
1371 
1372   /* backward solve the upper triangular */
1373   v  = aa + adiag[n-1];
1374   vi = aj + adiag[n-1];
1375   for (i=n-1; i>=0; i--) {
1376     nz  = adiag[i] - adiag[i+1] - 1;
1377     sum = tmp[i];
1378     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1379     tmp[i]   = sum*v[nz];
1380     x[c[i]] += tmp[i];
1381     v       += nz+1; vi += nz+1;
1382   }
1383 
1384   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1385   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1386   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1387   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1388   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace"
1394 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1395 {
1396   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1397   IS                iscol = a->col,isrow = a->row;
1398   PetscErrorCode    ierr;
1399   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1400   PetscInt          i,n = A->rmap->n,j;
1401   PetscInt          nz;
1402   PetscScalar       *x,*tmp,s1;
1403   const MatScalar   *aa = a->a,*v;
1404   const PetscScalar *b;
1405 
1406   PetscFunctionBegin;
1407   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1408   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1409   tmp  = a->solve_work;
1410 
1411   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1412   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1413 
1414   /* copy the b into temp work space according to permutation */
1415   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1416 
1417   /* forward solve the U^T */
1418   for (i=0; i<n; i++) {
1419     v   = aa + diag[i];
1420     vi  = aj + diag[i] + 1;
1421     nz  = ai[i+1] - diag[i] - 1;
1422     s1  = tmp[i];
1423     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1424     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1425     tmp[i] = s1;
1426   }
1427 
1428   /* backward solve the L^T */
1429   for (i=n-1; i>=0; i--) {
1430     v  = aa + diag[i] - 1;
1431     vi = aj + diag[i] - 1;
1432     nz = diag[i] - ai[i];
1433     s1 = tmp[i];
1434     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1435   }
1436 
1437   /* copy tmp into x according to permutation */
1438   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1439 
1440   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1441   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1442   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1443   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1444 
1445   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1446   PetscFunctionReturn(0);
1447 }
1448 
1449 #undef __FUNCT__
1450 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1451 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1452 {
1453   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1454   IS                iscol = a->col,isrow = a->row;
1455   PetscErrorCode    ierr;
1456   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1457   PetscInt          i,n = A->rmap->n,j;
1458   PetscInt          nz;
1459   PetscScalar       *x,*tmp,s1;
1460   const MatScalar   *aa = a->a,*v;
1461   const PetscScalar *b;
1462 
1463   PetscFunctionBegin;
1464   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1465   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1466   tmp  = a->solve_work;
1467 
1468   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1469   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1470 
1471   /* copy the b into temp work space according to permutation */
1472   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1473 
1474   /* forward solve the U^T */
1475   for (i=0; i<n; i++) {
1476     v   = aa + adiag[i+1] + 1;
1477     vi  = aj + adiag[i+1] + 1;
1478     nz  = adiag[i] - adiag[i+1] - 1;
1479     s1  = tmp[i];
1480     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1481     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1482     tmp[i] = s1;
1483   }
1484 
1485   /* backward solve the L^T */
1486   for (i=n-1; i>=0; i--) {
1487     v  = aa + ai[i];
1488     vi = aj + ai[i];
1489     nz = ai[i+1] - ai[i];
1490     s1 = tmp[i];
1491     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1492   }
1493 
1494   /* copy tmp into x according to permutation */
1495   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1496 
1497   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1498   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1499   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1500   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1501 
1502   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 #undef __FUNCT__
1507 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace"
1508 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1509 {
1510   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1511   IS                iscol = a->col,isrow = a->row;
1512   PetscErrorCode    ierr;
1513   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1514   PetscInt          i,n = A->rmap->n,j;
1515   PetscInt          nz;
1516   PetscScalar       *x,*tmp,s1;
1517   const MatScalar   *aa = a->a,*v;
1518   const PetscScalar *b;
1519 
1520   PetscFunctionBegin;
1521   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1522   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1523   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1524   tmp  = a->solve_work;
1525 
1526   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1527   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1528 
1529   /* copy the b into temp work space according to permutation */
1530   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1531 
1532   /* forward solve the U^T */
1533   for (i=0; i<n; i++) {
1534     v   = aa + diag[i];
1535     vi  = aj + diag[i] + 1;
1536     nz  = ai[i+1] - diag[i] - 1;
1537     s1  = tmp[i];
1538     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1539     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1540     tmp[i] = s1;
1541   }
1542 
1543   /* backward solve the L^T */
1544   for (i=n-1; i>=0; i--) {
1545     v  = aa + diag[i] - 1;
1546     vi = aj + diag[i] - 1;
1547     nz = diag[i] - ai[i];
1548     s1 = tmp[i];
1549     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1550   }
1551 
1552   /* copy tmp into x according to permutation */
1553   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1554 
1555   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1556   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1557   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1558   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1559 
1560   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 #undef __FUNCT__
1565 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1566 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1567 {
1568   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1569   IS                iscol = a->col,isrow = a->row;
1570   PetscErrorCode    ierr;
1571   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1572   PetscInt          i,n = A->rmap->n,j;
1573   PetscInt          nz;
1574   PetscScalar       *x,*tmp,s1;
1575   const MatScalar   *aa = a->a,*v;
1576   const PetscScalar *b;
1577 
1578   PetscFunctionBegin;
1579   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1580   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1581   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1582   tmp  = a->solve_work;
1583 
1584   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1585   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1586 
1587   /* copy the b into temp work space according to permutation */
1588   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1589 
1590   /* forward solve the U^T */
1591   for (i=0; i<n; i++) {
1592     v   = aa + adiag[i+1] + 1;
1593     vi  = aj + adiag[i+1] + 1;
1594     nz  = adiag[i] - adiag[i+1] - 1;
1595     s1  = tmp[i];
1596     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1597     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1598     tmp[i] = s1;
1599   }
1600 
1601 
1602   /* backward solve the L^T */
1603   for (i=n-1; i>=0; i--) {
1604     v  = aa + ai[i];
1605     vi = aj + ai[i];
1606     nz = ai[i+1] - ai[i];
1607     s1 = tmp[i];
1608     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1609   }
1610 
1611   /* copy tmp into x according to permutation */
1612   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1613 
1614   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1615   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1616   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1617   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1618 
1619   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 /* ----------------------------------------------------------------*/
1624 
1625 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool);
1626 
1627 /*
1628    ilu() under revised new data structure.
1629    Factored arrays bj and ba are stored as
1630      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1631 
1632    bi=fact->i is an array of size n+1, in which
1633    bi+
1634      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1635      bi[n]:  points to L(n-1,n-1)+1
1636 
1637   bdiag=fact->diag is an array of size n+1,in which
1638      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1639      bdiag[n]: points to entry of U(n-1,0)-1
1640 
1641    U(i,:) contains bdiag[i] as its last entry, i.e.,
1642     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1643 */
1644 #undef __FUNCT__
1645 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0"
1646 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1647 {
1648 
1649   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
1650   PetscErrorCode ierr;
1651   const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1652   PetscInt       i,j,k=0,nz,*bi,*bj,*bdiag;
1653   PetscBool      missing;
1654   IS             isicol;
1655 
1656   PetscFunctionBegin;
1657   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1658   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1659   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1660   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1661   ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr);
1662   b    = (Mat_SeqAIJ*)(fact)->data;
1663 
1664   /* allocate matrix arrays for new data structure */
1665   ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr);
1666   ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1667 
1668   b->singlemalloc = PETSC_TRUE;
1669   if (!b->diag) {
1670     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr);
1671     ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1672   }
1673   bdiag = b->diag;
1674 
1675   if (n > 0) {
1676     ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr);
1677   }
1678 
1679   /* set bi and bj with new data structure */
1680   bi = b->i;
1681   bj = b->j;
1682 
1683   /* L part */
1684   bi[0] = 0;
1685   for (i=0; i<n; i++) {
1686     nz      = adiag[i] - ai[i];
1687     bi[i+1] = bi[i] + nz;
1688     aj      = a->j + ai[i];
1689     for (j=0; j<nz; j++) {
1690       /*   *bj = aj[j]; bj++; */
1691       bj[k++] = aj[j];
1692     }
1693   }
1694 
1695   /* U part */
1696   bdiag[n] = bi[n]-1;
1697   for (i=n-1; i>=0; i--) {
1698     nz = ai[i+1] - adiag[i] - 1;
1699     aj = a->j + adiag[i] + 1;
1700     for (j=0; j<nz; j++) {
1701       /*      *bj = aj[j]; bj++; */
1702       bj[k++] = aj[j];
1703     }
1704     /* diag[i] */
1705     /*    *bj = i; bj++; */
1706     bj[k++]  = i;
1707     bdiag[i] = bdiag[i+1] + nz + 1;
1708   }
1709 
1710   fact->factortype             = MAT_FACTOR_ILU;
1711   fact->info.factor_mallocs    = 0;
1712   fact->info.fill_ratio_given  = info->fill;
1713   fact->info.fill_ratio_needed = 1.0;
1714   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1715 
1716   b       = (Mat_SeqAIJ*)(fact)->data;
1717   b->row  = isrow;
1718   b->col  = iscol;
1719   b->icol = isicol;
1720   ierr    = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1721   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1722   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1723   PetscFunctionReturn(0);
1724 }
1725 
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1728 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1729 {
1730   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1731   IS                 isicol;
1732   PetscErrorCode     ierr;
1733   const PetscInt     *r,*ic;
1734   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1735   PetscInt           *bi,*cols,nnz,*cols_lvl;
1736   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1737   PetscInt           i,levels,diagonal_fill;
1738   PetscBool          col_identity,row_identity;
1739   PetscReal          f;
1740   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1741   PetscBT            lnkbt;
1742   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1743   PetscFreeSpaceList free_space    =PETSC_NULL,current_space=PETSC_NULL;
1744   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1745 
1746   PetscFunctionBegin;
1747   /* Uncomment the old data struct part only while testing new data structure for MatSolve() */
1748   /*
1749   PetscBool          olddatastruct=PETSC_FALSE;
1750   ierr = PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
1751   if (olddatastruct) {
1752     ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1753     PetscFunctionReturn(0);
1754   }
1755   */
1756 
1757   levels = (PetscInt)info->levels;
1758   ierr   = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1759   ierr   = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1760   if (!levels && row_identity && col_identity) {
1761     /* special case: ilu(0) with natural ordering */
1762     ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1763     if (a->inode.size) {
1764       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1765     }
1766     PetscFunctionReturn(0);
1767   }
1768 
1769   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1770   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1771   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1772   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1773 
1774   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1775   ierr  = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1776   ierr  = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1777   bi[0] = bdiag[0] = 0;
1778   ierr  = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1779 
1780   /* create a linked list for storing column indices of the active row */
1781   nlnk = n + 1;
1782   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1783 
1784   /* initial FreeSpace size is f*(ai[n]+1) */
1785   f                 = info->fill;
1786   diagonal_fill     = (PetscInt)info->diagonal_fill;
1787   ierr              = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1788   current_space     = free_space;
1789   ierr              = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1790   current_space_lvl = free_space_lvl;
1791   for (i=0; i<n; i++) {
1792     nzi = 0;
1793     /* copy current row into linked list */
1794     nnz = ai[r[i]+1] - ai[r[i]];
1795     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1796     cols   = aj + ai[r[i]];
1797     lnk[i] = -1; /* marker to indicate if diagonal exists */
1798     ierr   = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1799     nzi   += nlnk;
1800 
1801     /* make sure diagonal entry is included */
1802     if (diagonal_fill && lnk[i] == -1) {
1803       fm = n;
1804       while (lnk[fm] < i) fm = lnk[fm];
1805       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1806       lnk[fm]    = i;
1807       lnk_lvl[i] = 0;
1808       nzi++; dcount++;
1809     }
1810 
1811     /* add pivot rows into the active row */
1812     nzbd = 0;
1813     prow = lnk[n];
1814     while (prow < i) {
1815       nnz      = bdiag[prow];
1816       cols     = bj_ptr[prow] + nnz + 1;
1817       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1818       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1819       ierr     = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1820       nzi     += nlnk;
1821       prow     = lnk[prow];
1822       nzbd++;
1823     }
1824     bdiag[i] = nzbd;
1825     bi[i+1]  = bi[i] + nzi;
1826     /* if free space is not available, make more free space */
1827     if (current_space->local_remaining<nzi) {
1828       nnz  = 2*nzi*(n - i); /* estimated and max additional space needed */
1829       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1830       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1831       reallocs++;
1832     }
1833 
1834     /* copy data into free_space and free_space_lvl, then initialize lnk */
1835     ierr         = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1836     bj_ptr[i]    = current_space->array;
1837     bjlvl_ptr[i] = current_space_lvl->array;
1838 
1839     /* make sure the active row i has diagonal entry */
1840     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1841 
1842     current_space->array               += nzi;
1843     current_space->local_used          += nzi;
1844     current_space->local_remaining     -= nzi;
1845     current_space_lvl->array           += nzi;
1846     current_space_lvl->local_used      += nzi;
1847     current_space_lvl->local_remaining -= nzi;
1848   }
1849 
1850   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1851   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1852   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1853   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1854   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
1855 
1856   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1857   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1858   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
1859 
1860 #if defined(PETSC_USE_INFO)
1861   {
1862     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1863     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1864     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1865     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1866     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1867     if (diagonal_fill) {
1868       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1869     }
1870   }
1871 #endif
1872   /* put together the new matrix */
1873   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1874   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1875   b    = (Mat_SeqAIJ*)(fact)->data;
1876 
1877   b->free_a       = PETSC_TRUE;
1878   b->free_ij      = PETSC_TRUE;
1879   b->singlemalloc = PETSC_FALSE;
1880 
1881   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
1882 
1883   b->j    = bj;
1884   b->i    = bi;
1885   b->diag = bdiag;
1886   b->ilen = 0;
1887   b->imax = 0;
1888   b->row  = isrow;
1889   b->col  = iscol;
1890   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1891   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1892   b->icol = isicol;
1893 
1894   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1895   /* In b structure:  Free imax, ilen, old a, old j.
1896      Allocate bdiag, solve_work, new a, new j */
1897   ierr     = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1898   b->maxnz = b->nz = bdiag[0]+1;
1899 
1900   (fact)->info.factor_mallocs    = reallocs;
1901   (fact)->info.fill_ratio_given  = f;
1902   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1903   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1904   if (a->inode.size) {
1905     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1906   }
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 #undef __FUNCT__
1911 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace"
1912 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1913 {
1914   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1915   IS                 isicol;
1916   PetscErrorCode     ierr;
1917   const PetscInt     *r,*ic;
1918   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1919   PetscInt           *bi,*cols,nnz,*cols_lvl;
1920   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1921   PetscInt           i,levels,diagonal_fill;
1922   PetscBool          col_identity,row_identity;
1923   PetscReal          f;
1924   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1925   PetscBT            lnkbt;
1926   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1927   PetscFreeSpaceList free_space    =PETSC_NULL,current_space=PETSC_NULL;
1928   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1929   PetscBool          missing;
1930 
1931   PetscFunctionBegin;
1932   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1933   f             = info->fill;
1934   levels        = (PetscInt)info->levels;
1935   diagonal_fill = (PetscInt)info->diagonal_fill;
1936 
1937   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1938 
1939   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1940   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1941   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1942     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr);
1943 
1944     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1945     if (a->inode.size) {
1946       (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1947     }
1948     fact->factortype               = MAT_FACTOR_ILU;
1949     (fact)->info.factor_mallocs    = 0;
1950     (fact)->info.fill_ratio_given  = info->fill;
1951     (fact)->info.fill_ratio_needed = 1.0;
1952 
1953     b    = (Mat_SeqAIJ*)(fact)->data;
1954     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1955     if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1956     b->row  = isrow;
1957     b->col  = iscol;
1958     b->icol = isicol;
1959     ierr    = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1960     ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1961     ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1962     PetscFunctionReturn(0);
1963   }
1964 
1965   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1966   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1967 
1968   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1969   ierr  = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1970   ierr  = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1971   bi[0] = bdiag[0] = 0;
1972 
1973   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1974 
1975   /* create a linked list for storing column indices of the active row */
1976   nlnk = n + 1;
1977   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1978 
1979   /* initial FreeSpace size is f*(ai[n]+1) */
1980   ierr              = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1981   current_space     = free_space;
1982   ierr              = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1983   current_space_lvl = free_space_lvl;
1984 
1985   for (i=0; i<n; i++) {
1986     nzi = 0;
1987     /* copy current row into linked list */
1988     nnz = ai[r[i]+1] - ai[r[i]];
1989     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1990     cols   = aj + ai[r[i]];
1991     lnk[i] = -1; /* marker to indicate if diagonal exists */
1992     ierr   = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1993     nzi   += nlnk;
1994 
1995     /* make sure diagonal entry is included */
1996     if (diagonal_fill && lnk[i] == -1) {
1997       fm = n;
1998       while (lnk[fm] < i) fm = lnk[fm];
1999       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
2000       lnk[fm]    = i;
2001       lnk_lvl[i] = 0;
2002       nzi++; dcount++;
2003     }
2004 
2005     /* add pivot rows into the active row */
2006     nzbd = 0;
2007     prow = lnk[n];
2008     while (prow < i) {
2009       nnz      = bdiag[prow];
2010       cols     = bj_ptr[prow] + nnz + 1;
2011       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
2012       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
2013       ierr     = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
2014       nzi     += nlnk;
2015       prow     = lnk[prow];
2016       nzbd++;
2017     }
2018     bdiag[i] = nzbd;
2019     bi[i+1]  = bi[i] + nzi;
2020 
2021     /* if free space is not available, make more free space */
2022     if (current_space->local_remaining<nzi) {
2023       nnz  = nzi*(n - i); /* estimated and max additional space needed */
2024       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
2025       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
2026       reallocs++;
2027     }
2028 
2029     /* copy data into free_space and free_space_lvl, then initialize lnk */
2030     ierr         = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2031     bj_ptr[i]    = current_space->array;
2032     bjlvl_ptr[i] = current_space_lvl->array;
2033 
2034     /* make sure the active row i has diagonal entry */
2035     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2036 
2037     current_space->array               += nzi;
2038     current_space->local_used          += nzi;
2039     current_space->local_remaining     -= nzi;
2040     current_space_lvl->array           += nzi;
2041     current_space_lvl->local_used      += nzi;
2042     current_space_lvl->local_remaining -= nzi;
2043   }
2044 
2045   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2046   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2047 
2048   /* destroy list of free space and other temporary arrays */
2049   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
2050   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */
2051   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2052   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2053   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
2054 
2055 #if defined(PETSC_USE_INFO)
2056   {
2057     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2058     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
2059     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2060     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
2061     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2062     if (diagonal_fill) {
2063       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
2064     }
2065   }
2066 #endif
2067 
2068   /* put together the new matrix */
2069   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
2070   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
2071   b    = (Mat_SeqAIJ*)(fact)->data;
2072 
2073   b->free_a       = PETSC_TRUE;
2074   b->free_ij      = PETSC_TRUE;
2075   b->singlemalloc = PETSC_FALSE;
2076 
2077   ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
2078   b->j = bj;
2079   b->i = bi;
2080   for (i=0; i<n; i++) bdiag[i] += bi[i];
2081   b->diag = bdiag;
2082   b->ilen = 0;
2083   b->imax = 0;
2084   b->row  = isrow;
2085   b->col  = iscol;
2086   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2087   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2088   b->icol = isicol;
2089   ierr    = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2090   /* In b structure:  Free imax, ilen, old a, old j.
2091      Allocate bdiag, solve_work, new a, new j */
2092   ierr     = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
2093   b->maxnz = b->nz = bi[n];
2094 
2095   (fact)->info.factor_mallocs    = reallocs;
2096   (fact)->info.fill_ratio_given  = f;
2097   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2098   (fact)->ops->lufactornumeric   =  MatLUFactorNumeric_SeqAIJ_inplace;
2099   if (a->inode.size) {
2100     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2101   }
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 #undef __FUNCT__
2106 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
2107 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2108 {
2109   Mat            C = B;
2110   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2111   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2112   IS             ip=b->row,iip = b->icol;
2113   PetscErrorCode ierr;
2114   const PetscInt *rip,*riip;
2115   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2116   PetscInt       *ai=a->i,*aj=a->j;
2117   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2118   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2119   PetscBool      perm_identity;
2120   FactorShiftCtx sctx;
2121   PetscReal      rs;
2122   MatScalar      d,*v;
2123 
2124   PetscFunctionBegin;
2125   /* MatPivotSetUp(): initialize shift context sctx */
2126   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2127 
2128   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2129     sctx.shift_top = info->zeropivot;
2130     for (i=0; i<mbs; i++) {
2131       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2132       d  = (aa)[a->diag[i]];
2133       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2134       v  = aa+ai[i];
2135       nz = ai[i+1] - ai[i];
2136       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2137       if (rs>sctx.shift_top) sctx.shift_top = rs;
2138     }
2139     sctx.shift_top *= 1.1;
2140     sctx.nshift_max = 5;
2141     sctx.shift_lo   = 0.;
2142     sctx.shift_hi   = 1.;
2143   }
2144 
2145   ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2146   ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2147 
2148   /* allocate working arrays
2149      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2150      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2151   */
2152   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr);
2153 
2154   do {
2155     sctx.newshift = PETSC_FALSE;
2156 
2157     for (i=0; i<mbs; i++) c2r[i] = mbs;
2158     if (mbs) il[0] = 0;
2159 
2160     for (k = 0; k<mbs; k++) {
2161       /* zero rtmp */
2162       nz    = bi[k+1] - bi[k];
2163       bjtmp = bj + bi[k];
2164       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2165 
2166       /* load in initial unfactored row */
2167       bval = ba + bi[k];
2168       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2169       for (j = jmin; j < jmax; j++) {
2170         col = riip[aj[j]];
2171         if (col >= k) { /* only take upper triangular entry */
2172           rtmp[col] = aa[j];
2173           *bval++   = 0.0; /* for in-place factorization */
2174         }
2175       }
2176       /* shift the diagonal of the matrix: ZeropivotApply() */
2177       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2178 
2179       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2180       dk = rtmp[k];
2181       i  = c2r[k]; /* first row to be added to k_th row  */
2182 
2183       while (i < k) {
2184         nexti = c2r[i]; /* next row to be added to k_th row */
2185 
2186         /* compute multiplier, update diag(k) and U(i,k) */
2187         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2188         uikdi   = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2189         dk     += uikdi*ba[ili]; /* update diag[k] */
2190         ba[ili] = uikdi; /* -U(i,k) */
2191 
2192         /* add multiple of row i to k-th row */
2193         jmin = ili + 1; jmax = bi[i+1];
2194         if (jmin < jmax) {
2195           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2196           /* update il and c2r for row i */
2197           il[i] = jmin;
2198           j     = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2199         }
2200         i = nexti;
2201       }
2202 
2203       /* copy data into U(k,:) */
2204       rs   = 0.0;
2205       jmin = bi[k]; jmax = bi[k+1]-1;
2206       if (jmin < jmax) {
2207         for (j=jmin; j<jmax; j++) {
2208           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2209         }
2210         /* add the k-th row into il and c2r */
2211         il[k] = jmin;
2212         i     = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2213       }
2214 
2215       /* MatPivotCheck() */
2216       sctx.rs = rs;
2217       sctx.pv = dk;
2218       ierr    = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
2219       if (sctx.newshift) break;
2220       dk = sctx.pv;
2221 
2222       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2223     }
2224   } while (sctx.newshift);
2225 
2226   ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr);
2227   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2228   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2229 
2230   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2231   if (perm_identity) {
2232     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2233     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2234     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2235     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2236   } else {
2237     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2238     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2239     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2240     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2241   }
2242 
2243   C->assembled    = PETSC_TRUE;
2244   C->preallocated = PETSC_TRUE;
2245 
2246   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2247 
2248   /* MatPivotView() */
2249   if (sctx.nshift) {
2250     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2251       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
2252     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2253       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2254     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2255       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
2256     }
2257   }
2258   PetscFunctionReturn(0);
2259 }
2260 
2261 #undef __FUNCT__
2262 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace"
2263 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2264 {
2265   Mat            C = B;
2266   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2267   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2268   IS             ip=b->row,iip = b->icol;
2269   PetscErrorCode ierr;
2270   const PetscInt *rip,*riip;
2271   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2272   PetscInt       *ai=a->i,*aj=a->j;
2273   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2274   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2275   PetscBool      perm_identity;
2276   FactorShiftCtx sctx;
2277   PetscReal      rs;
2278   MatScalar      d,*v;
2279 
2280   PetscFunctionBegin;
2281   /* MatPivotSetUp(): initialize shift context sctx */
2282   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2283 
2284   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2285     sctx.shift_top = info->zeropivot;
2286     for (i=0; i<mbs; i++) {
2287       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2288       d  = (aa)[a->diag[i]];
2289       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2290       v  = aa+ai[i];
2291       nz = ai[i+1] - ai[i];
2292       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2293       if (rs>sctx.shift_top) sctx.shift_top = rs;
2294     }
2295     sctx.shift_top *= 1.1;
2296     sctx.nshift_max = 5;
2297     sctx.shift_lo   = 0.;
2298     sctx.shift_hi   = 1.;
2299   }
2300 
2301   ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2302   ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2303 
2304   /* initialization */
2305   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr);
2306 
2307   do {
2308     sctx.newshift = PETSC_FALSE;
2309 
2310     for (i=0; i<mbs; i++) jl[i] = mbs;
2311     il[0] = 0;
2312 
2313     for (k = 0; k<mbs; k++) {
2314       /* zero rtmp */
2315       nz    = bi[k+1] - bi[k];
2316       bjtmp = bj + bi[k];
2317       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2318 
2319       bval = ba + bi[k];
2320       /* initialize k-th row by the perm[k]-th row of A */
2321       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2322       for (j = jmin; j < jmax; j++) {
2323         col = riip[aj[j]];
2324         if (col >= k) { /* only take upper triangular entry */
2325           rtmp[col] = aa[j];
2326           *bval++   = 0.0; /* for in-place factorization */
2327         }
2328       }
2329       /* shift the diagonal of the matrix */
2330       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2331 
2332       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2333       dk = rtmp[k];
2334       i  = jl[k]; /* first row to be added to k_th row  */
2335 
2336       while (i < k) {
2337         nexti = jl[i]; /* next row to be added to k_th row */
2338 
2339         /* compute multiplier, update diag(k) and U(i,k) */
2340         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2341         uikdi   = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2342         dk     += uikdi*ba[ili];
2343         ba[ili] = uikdi; /* -U(i,k) */
2344 
2345         /* add multiple of row i to k-th row */
2346         jmin = ili + 1; jmax = bi[i+1];
2347         if (jmin < jmax) {
2348           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2349           /* update il and jl for row i */
2350           il[i] = jmin;
2351           j     = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2352         }
2353         i = nexti;
2354       }
2355 
2356       /* shift the diagonals when zero pivot is detected */
2357       /* compute rs=sum of abs(off-diagonal) */
2358       rs   = 0.0;
2359       jmin = bi[k]+1;
2360       nz   = bi[k+1] - jmin;
2361       bcol = bj + jmin;
2362       for (j=0; j<nz; j++) {
2363         rs += PetscAbsScalar(rtmp[bcol[j]]);
2364       }
2365 
2366       sctx.rs = rs;
2367       sctx.pv = dk;
2368       ierr    = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr);
2369       if (sctx.newshift) break;
2370       dk = sctx.pv;
2371 
2372       /* copy data into U(k,:) */
2373       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2374       jmin      = bi[k]+1; jmax = bi[k+1];
2375       if (jmin < jmax) {
2376         for (j=jmin; j<jmax; j++) {
2377           col = bj[j]; ba[j] = rtmp[col];
2378         }
2379         /* add the k-th row into il and jl */
2380         il[k] = jmin;
2381         i     = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2382       }
2383     }
2384   } while (sctx.newshift);
2385 
2386   ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr);
2387   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2388   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2389 
2390   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2391   if (perm_identity) {
2392     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2393     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2394     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2395     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2396   } else {
2397     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2398     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2399     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2400     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2401   }
2402 
2403   C->assembled    = PETSC_TRUE;
2404   C->preallocated = PETSC_TRUE;
2405 
2406   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2407   if (sctx.nshift) {
2408     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2409       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2410     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2411       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2412     }
2413   }
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 /*
2418    icc() under revised new data structure.
2419    Factored arrays bj and ba are stored as
2420      U(0,:),...,U(i,:),U(n-1,:)
2421 
2422    ui=fact->i is an array of size n+1, in which
2423    ui+
2424      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2425      ui[n]:  points to U(n-1,n-1)+1
2426 
2427   udiag=fact->diag is an array of size n,in which
2428      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2429 
2430    U(i,:) contains udiag[i] as its last entry, i.e.,
2431     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2432 */
2433 
2434 #undef __FUNCT__
2435 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
2436 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2437 {
2438   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2439   Mat_SeqSBAIJ       *b;
2440   PetscErrorCode     ierr;
2441   PetscBool          perm_identity,missing;
2442   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2443   const PetscInt     *rip,*riip;
2444   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2445   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2446   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2447   PetscReal          fill          =info->fill,levels=info->levels;
2448   PetscFreeSpaceList free_space    =PETSC_NULL,current_space=PETSC_NULL;
2449   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2450   PetscBT            lnkbt;
2451   IS                 iperm;
2452 
2453   PetscFunctionBegin;
2454   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2455   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2456   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2457   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2458   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2459 
2460   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2461   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2462   ui[0] = 0;
2463 
2464   /* ICC(0) without matrix ordering: simply rearrange column indices */
2465   if (!levels && perm_identity) {
2466     for (i=0; i<am; i++) {
2467       ncols    = ai[i+1] - a->diag[i];
2468       ui[i+1]  = ui[i] + ncols;
2469       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2470     }
2471     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2472     cols = uj;
2473     for (i=0; i<am; i++) {
2474       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2475       ncols = ai[i+1] - a->diag[i] -1;
2476       for (j=0; j<ncols; j++) *cols++ = aj[j];
2477       *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2478     }
2479   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2480     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2481     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2482 
2483     /* initialization */
2484     ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2485 
2486     /* jl: linked list for storing indices of the pivot rows
2487        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2488     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2489     for (i=0; i<am; i++) {
2490       jl[i] = am; il[i] = 0;
2491     }
2492 
2493     /* create and initialize a linked list for storing column indices of the active row k */
2494     nlnk = am + 1;
2495     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2496 
2497     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2498     ierr              = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2499     current_space     = free_space;
2500     ierr              = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr);
2501     current_space_lvl = free_space_lvl;
2502 
2503     for (k=0; k<am; k++) {  /* for each active row k */
2504       /* initialize lnk by the column indices of row rip[k] of A */
2505       nzk   = 0;
2506       ncols = ai[rip[k]+1] - ai[rip[k]];
2507       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2508       ncols_upper = 0;
2509       for (j=0; j<ncols; j++) {
2510         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2511         if (riip[i] >= k) { /* only take upper triangular entry */
2512           ajtmp[ncols_upper] = i;
2513           ncols_upper++;
2514         }
2515       }
2516       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2517       nzk += nlnk;
2518 
2519       /* update lnk by computing fill-in for each pivot row to be merged in */
2520       prow = jl[k]; /* 1st pivot row */
2521 
2522       while (prow < k) {
2523         nextprow = jl[prow];
2524 
2525         /* merge prow into k-th row */
2526         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2527         jmax  = ui[prow+1];
2528         ncols = jmax-jmin;
2529         i     = jmin - ui[prow];
2530         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2531         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2532         j     = *(uj - 1);
2533         ierr  = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2534         nzk  += nlnk;
2535 
2536         /* update il and jl for prow */
2537         if (jmin < jmax) {
2538           il[prow] = jmin;
2539           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2540         }
2541         prow = nextprow;
2542       }
2543 
2544       /* if free space is not available, make more free space */
2545       if (current_space->local_remaining<nzk) {
2546         i    = am - k + 1; /* num of unfactored rows */
2547         i   *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2548         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2549         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2550         reallocs++;
2551       }
2552 
2553       /* copy data into free_space and free_space_lvl, then initialize lnk */
2554       if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2555       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2556 
2557       /* add the k-th row into il and jl */
2558       if (nzk > 1) {
2559         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2560         jl[k] = jl[i]; jl[i] = k;
2561         il[k] = ui[k] + 1;
2562       }
2563       uj_ptr[k]     = current_space->array;
2564       uj_lvl_ptr[k] = current_space_lvl->array;
2565 
2566       current_space->array           += nzk;
2567       current_space->local_used      += nzk;
2568       current_space->local_remaining -= nzk;
2569 
2570       current_space_lvl->array           += nzk;
2571       current_space_lvl->local_used      += nzk;
2572       current_space_lvl->local_remaining -= nzk;
2573 
2574       ui[k+1] = ui[k] + nzk;
2575     }
2576 
2577     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2578     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2579     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2580     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2581 
2582     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2583     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2584     ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor  */
2585     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2586     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2587 
2588   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2589 
2590   /* put together the new matrix in MATSEQSBAIJ format */
2591   b               = (Mat_SeqSBAIJ*)(fact)->data;
2592   b->singlemalloc = PETSC_FALSE;
2593 
2594   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2595 
2596   b->j             = uj;
2597   b->i             = ui;
2598   b->diag          = udiag;
2599   b->free_diag     = PETSC_TRUE;
2600   b->ilen          = 0;
2601   b->imax          = 0;
2602   b->row           = perm;
2603   b->col           = perm;
2604   ierr             = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2605   ierr             = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2606   b->icol          = iperm;
2607   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2608 
2609   ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2610   ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2611 
2612   b->maxnz   = b->nz = ui[am];
2613   b->free_a  = PETSC_TRUE;
2614   b->free_ij = PETSC_TRUE;
2615 
2616   fact->info.factor_mallocs   = reallocs;
2617   fact->info.fill_ratio_given = fill;
2618   if (ai[am] != 0) {
2619     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2620     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2621   } else {
2622     fact->info.fill_ratio_needed = 0.0;
2623   }
2624 #if defined(PETSC_USE_INFO)
2625   if (ai[am] != 0) {
2626     PetscReal af = fact->info.fill_ratio_needed;
2627     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2628     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2629     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2630   } else {
2631     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2632   }
2633 #endif
2634   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2635   PetscFunctionReturn(0);
2636 }
2637 
2638 #undef __FUNCT__
2639 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace"
2640 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2641 {
2642   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2643   Mat_SeqSBAIJ       *b;
2644   PetscErrorCode     ierr;
2645   PetscBool          perm_identity,missing;
2646   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2647   const PetscInt     *rip,*riip;
2648   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2649   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2650   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2651   PetscReal          fill          =info->fill,levels=info->levels;
2652   PetscFreeSpaceList free_space    =PETSC_NULL,current_space=PETSC_NULL;
2653   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2654   PetscBT            lnkbt;
2655   IS                 iperm;
2656 
2657   PetscFunctionBegin;
2658   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2659   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2660   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2661   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2662   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2663 
2664   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2665   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2666   ui[0] = 0;
2667 
2668   /* ICC(0) without matrix ordering: simply copies fill pattern */
2669   if (!levels && perm_identity) {
2670 
2671     for (i=0; i<am; i++) {
2672       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2673       udiag[i] = ui[i];
2674     }
2675     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2676     cols = uj;
2677     for (i=0; i<am; i++) {
2678       aj    = a->j + a->diag[i];
2679       ncols = ui[i+1] - ui[i];
2680       for (j=0; j<ncols; j++) *cols++ = *aj++;
2681     }
2682   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2683     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2684     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2685 
2686     /* initialization */
2687     ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2688 
2689     /* jl: linked list for storing indices of the pivot rows
2690        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2691     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2692     for (i=0; i<am; i++) {
2693       jl[i] = am; il[i] = 0;
2694     }
2695 
2696     /* create and initialize a linked list for storing column indices of the active row k */
2697     nlnk = am + 1;
2698     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2699 
2700     /* initial FreeSpace size is fill*(ai[am]+1) */
2701     ierr              = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2702     current_space     = free_space;
2703     ierr              = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2704     current_space_lvl = free_space_lvl;
2705 
2706     for (k=0; k<am; k++) {  /* for each active row k */
2707       /* initialize lnk by the column indices of row rip[k] of A */
2708       nzk   = 0;
2709       ncols = ai[rip[k]+1] - ai[rip[k]];
2710       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2711       ncols_upper = 0;
2712       for (j=0; j<ncols; j++) {
2713         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2714         if (riip[i] >= k) { /* only take upper triangular entry */
2715           ajtmp[ncols_upper] = i;
2716           ncols_upper++;
2717         }
2718       }
2719       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2720       nzk += nlnk;
2721 
2722       /* update lnk by computing fill-in for each pivot row to be merged in */
2723       prow = jl[k]; /* 1st pivot row */
2724 
2725       while (prow < k) {
2726         nextprow = jl[prow];
2727 
2728         /* merge prow into k-th row */
2729         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2730         jmax  = ui[prow+1];
2731         ncols = jmax-jmin;
2732         i     = jmin - ui[prow];
2733         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2734         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2735         j     = *(uj - 1);
2736         ierr  = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2737         nzk  += nlnk;
2738 
2739         /* update il and jl for prow */
2740         if (jmin < jmax) {
2741           il[prow] = jmin;
2742           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2743         }
2744         prow = nextprow;
2745       }
2746 
2747       /* if free space is not available, make more free space */
2748       if (current_space->local_remaining<nzk) {
2749         i    = am - k + 1; /* num of unfactored rows */
2750         i   *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2751         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2752         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2753         reallocs++;
2754       }
2755 
2756       /* copy data into free_space and free_space_lvl, then initialize lnk */
2757       if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2758       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2759 
2760       /* add the k-th row into il and jl */
2761       if (nzk > 1) {
2762         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2763         jl[k] = jl[i]; jl[i] = k;
2764         il[k] = ui[k] + 1;
2765       }
2766       uj_ptr[k]     = current_space->array;
2767       uj_lvl_ptr[k] = current_space_lvl->array;
2768 
2769       current_space->array           += nzk;
2770       current_space->local_used      += nzk;
2771       current_space->local_remaining -= nzk;
2772 
2773       current_space_lvl->array           += nzk;
2774       current_space_lvl->local_used      += nzk;
2775       current_space_lvl->local_remaining -= nzk;
2776 
2777       ui[k+1] = ui[k] + nzk;
2778     }
2779 
2780 #if defined(PETSC_USE_INFO)
2781     if (ai[am] != 0) {
2782       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2783       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2784       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2785       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2786     } else {
2787       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2788     }
2789 #endif
2790 
2791     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2792     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2793     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2794     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2795 
2796     /* destroy list of free space and other temporary array(s) */
2797     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2798     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2799     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2800     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2801 
2802   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2803 
2804   /* put together the new matrix in MATSEQSBAIJ format */
2805 
2806   b                = (Mat_SeqSBAIJ*)fact->data;
2807   b->singlemalloc  = PETSC_FALSE;
2808 
2809   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2810 
2811   b->j         = uj;
2812   b->i         = ui;
2813   b->diag      = udiag;
2814   b->free_diag = PETSC_TRUE;
2815   b->ilen      = 0;
2816   b->imax      = 0;
2817   b->row       = perm;
2818   b->col       = perm;
2819 
2820   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2821   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2822 
2823   b->icol          = iperm;
2824   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2825   ierr             = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2826   ierr             = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2827   b->maxnz         = b->nz = ui[am];
2828   b->free_a        = PETSC_TRUE;
2829   b->free_ij       = PETSC_TRUE;
2830 
2831   fact->info.factor_mallocs   = reallocs;
2832   fact->info.fill_ratio_given = fill;
2833   if (ai[am] != 0) {
2834     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2835   } else {
2836     fact->info.fill_ratio_needed = 0.0;
2837   }
2838   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2839   PetscFunctionReturn(0);
2840 }
2841 
2842 #undef __FUNCT__
2843 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
2844 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2845 {
2846   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2847   Mat_SeqSBAIJ       *b;
2848   PetscErrorCode     ierr;
2849   PetscBool          perm_identity;
2850   PetscReal          fill = info->fill;
2851   const PetscInt     *rip,*riip;
2852   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2853   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2854   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2855   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2856   PetscBT            lnkbt;
2857   IS                 iperm;
2858 
2859   PetscFunctionBegin;
2860   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2861   /* check whether perm is the identity mapping */
2862   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2863   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2864   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2865   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2866 
2867   /* initialization */
2868   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2869   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2870   ui[0] = 0;
2871 
2872   /* jl: linked list for storing indices of the pivot rows
2873      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2874   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
2875   for (i=0; i<am; i++) {
2876     jl[i] = am; il[i] = 0;
2877   }
2878 
2879   /* create and initialize a linked list for storing column indices of the active row k */
2880   nlnk = am + 1;
2881   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2882 
2883   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2884   ierr          = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2885   current_space = free_space;
2886 
2887   for (k=0; k<am; k++) {  /* for each active row k */
2888     /* initialize lnk by the column indices of row rip[k] of A */
2889     nzk   = 0;
2890     ncols = ai[rip[k]+1] - ai[rip[k]];
2891     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2892     ncols_upper = 0;
2893     for (j=0; j<ncols; j++) {
2894       i = riip[*(aj + ai[rip[k]] + j)];
2895       if (i >= k) { /* only take upper triangular entry */
2896         cols[ncols_upper] = i;
2897         ncols_upper++;
2898       }
2899     }
2900     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2901     nzk += nlnk;
2902 
2903     /* update lnk by computing fill-in for each pivot row to be merged in */
2904     prow = jl[k]; /* 1st pivot row */
2905 
2906     while (prow < k) {
2907       nextprow = jl[prow];
2908       /* merge prow into k-th row */
2909       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2910       jmax   = ui[prow+1];
2911       ncols  = jmax-jmin;
2912       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2913       ierr   = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2914       nzk   += nlnk;
2915 
2916       /* update il and jl for prow */
2917       if (jmin < jmax) {
2918         il[prow] = jmin;
2919         j        = *uj_ptr;
2920         jl[prow] = jl[j];
2921         jl[j]    = prow;
2922       }
2923       prow = nextprow;
2924     }
2925 
2926     /* if free space is not available, make more free space */
2927     if (current_space->local_remaining<nzk) {
2928       i    = am - k + 1; /* num of unfactored rows */
2929       i   *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2930       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2931       reallocs++;
2932     }
2933 
2934     /* copy data into free space, then initialize lnk */
2935     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
2936 
2937     /* add the k-th row into il and jl */
2938     if (nzk > 1) {
2939       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2940       jl[k] = jl[i]; jl[i] = k;
2941       il[k] = ui[k] + 1;
2942     }
2943     ui_ptr[k] = current_space->array;
2944 
2945     current_space->array           += nzk;
2946     current_space->local_used      += nzk;
2947     current_space->local_remaining -= nzk;
2948 
2949     ui[k+1] = ui[k] + nzk;
2950   }
2951 
2952   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2953   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2954   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
2955 
2956   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2957   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2958   ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */
2959   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2960 
2961   /* put together the new matrix in MATSEQSBAIJ format */
2962 
2963   b               = (Mat_SeqSBAIJ*)fact->data;
2964   b->singlemalloc = PETSC_FALSE;
2965   b->free_a       = PETSC_TRUE;
2966   b->free_ij      = PETSC_TRUE;
2967 
2968   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2969 
2970   b->j         = uj;
2971   b->i         = ui;
2972   b->diag      = udiag;
2973   b->free_diag = PETSC_TRUE;
2974   b->ilen      = 0;
2975   b->imax      = 0;
2976   b->row       = perm;
2977   b->col       = perm;
2978   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2979   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2980 
2981   b->icol          = iperm;
2982   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2983   ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2984   ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2985 
2986   b->maxnz = b->nz = ui[am];
2987 
2988   fact->info.factor_mallocs   = reallocs;
2989   fact->info.fill_ratio_given = fill;
2990   if (ai[am] != 0) {
2991     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2992     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2993   } else {
2994     fact->info.fill_ratio_needed = 0.0;
2995   }
2996 #if defined(PETSC_USE_INFO)
2997   if (ai[am] != 0) {
2998     PetscReal af = fact->info.fill_ratio_needed;
2999     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
3000     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
3001     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
3002   } else {
3003     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
3004   }
3005 #endif
3006   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
3007   PetscFunctionReturn(0);
3008 }
3009 
3010 #undef __FUNCT__
3011 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace"
3012 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
3013 {
3014   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
3015   Mat_SeqSBAIJ       *b;
3016   PetscErrorCode     ierr;
3017   PetscBool          perm_identity;
3018   PetscReal          fill = info->fill;
3019   const PetscInt     *rip,*riip;
3020   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
3021   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
3022   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
3023   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
3024   PetscBT            lnkbt;
3025   IS                 iperm;
3026 
3027   PetscFunctionBegin;
3028   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
3029   /* check whether perm is the identity mapping */
3030   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
3031   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
3032   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
3033   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
3034 
3035   /* initialization */
3036   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
3037   ui[0] = 0;
3038 
3039   /* jl: linked list for storing indices of the pivot rows
3040      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3041   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
3042   for (i=0; i<am; i++) {
3043     jl[i] = am; il[i] = 0;
3044   }
3045 
3046   /* create and initialize a linked list for storing column indices of the active row k */
3047   nlnk = am + 1;
3048   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3049 
3050   /* initial FreeSpace size is fill*(ai[am]+1) */
3051   ierr          = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
3052   current_space = free_space;
3053 
3054   for (k=0; k<am; k++) {  /* for each active row k */
3055     /* initialize lnk by the column indices of row rip[k] of A */
3056     nzk   = 0;
3057     ncols = ai[rip[k]+1] - ai[rip[k]];
3058     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3059     ncols_upper = 0;
3060     for (j=0; j<ncols; j++) {
3061       i = riip[*(aj + ai[rip[k]] + j)];
3062       if (i >= k) { /* only take upper triangular entry */
3063         cols[ncols_upper] = i;
3064         ncols_upper++;
3065       }
3066     }
3067     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3068     nzk += nlnk;
3069 
3070     /* update lnk by computing fill-in for each pivot row to be merged in */
3071     prow = jl[k]; /* 1st pivot row */
3072 
3073     while (prow < k) {
3074       nextprow = jl[prow];
3075       /* merge prow into k-th row */
3076       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3077       jmax   = ui[prow+1];
3078       ncols  = jmax-jmin;
3079       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3080       ierr   = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3081       nzk   += nlnk;
3082 
3083       /* update il and jl for prow */
3084       if (jmin < jmax) {
3085         il[prow] = jmin;
3086         j        = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3087       }
3088       prow = nextprow;
3089     }
3090 
3091     /* if free space is not available, make more free space */
3092     if (current_space->local_remaining<nzk) {
3093       i    = am - k + 1; /* num of unfactored rows */
3094       i    = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3095       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
3096       reallocs++;
3097     }
3098 
3099     /* copy data into free space, then initialize lnk */
3100     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3101 
3102     /* add the k-th row into il and jl */
3103     if (nzk-1 > 0) {
3104       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3105       jl[k] = jl[i]; jl[i] = k;
3106       il[k] = ui[k] + 1;
3107     }
3108     ui_ptr[k] = current_space->array;
3109 
3110     current_space->array           += nzk;
3111     current_space->local_used      += nzk;
3112     current_space->local_remaining -= nzk;
3113 
3114     ui[k+1] = ui[k] + nzk;
3115   }
3116 
3117 #if defined(PETSC_USE_INFO)
3118   if (ai[am] != 0) {
3119     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3120     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
3121     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
3122     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
3123   } else {
3124     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
3125   }
3126 #endif
3127 
3128   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
3129   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
3130   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
3131 
3132   /* destroy list of free space and other temporary array(s) */
3133   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
3134   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
3135   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3136 
3137   /* put together the new matrix in MATSEQSBAIJ format */
3138 
3139   b               = (Mat_SeqSBAIJ*)fact->data;
3140   b->singlemalloc = PETSC_FALSE;
3141   b->free_a       = PETSC_TRUE;
3142   b->free_ij      = PETSC_TRUE;
3143 
3144   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
3145 
3146   b->j    = uj;
3147   b->i    = ui;
3148   b->diag = 0;
3149   b->ilen = 0;
3150   b->imax = 0;
3151   b->row  = perm;
3152   b->col  = perm;
3153 
3154   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3155   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3156 
3157   b->icol          = iperm;
3158   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3159 
3160   ierr     = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3161   ierr     = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3162   b->maxnz = b->nz = ui[am];
3163 
3164   fact->info.factor_mallocs   = reallocs;
3165   fact->info.fill_ratio_given = fill;
3166   if (ai[am] != 0) {
3167     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3168   } else {
3169     fact->info.fill_ratio_needed = 0.0;
3170   }
3171   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3172   PetscFunctionReturn(0);
3173 }
3174 
3175 #undef __FUNCT__
3176 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
3177 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3178 {
3179   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3180   PetscErrorCode    ierr;
3181   PetscInt          n   = A->rmap->n;
3182   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3183   PetscScalar       *x,sum;
3184   const PetscScalar *b;
3185   const MatScalar   *aa = a->a,*v;
3186   PetscInt          i,nz;
3187 
3188   PetscFunctionBegin;
3189   if (!n) PetscFunctionReturn(0);
3190 
3191   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3192   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3193 
3194   /* forward solve the lower triangular */
3195   x[0] = b[0];
3196   v    = aa;
3197   vi   = aj;
3198   for (i=1; i<n; i++) {
3199     nz  = ai[i+1] - ai[i];
3200     sum = b[i];
3201     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3202     v   += nz;
3203     vi  += nz;
3204     x[i] = sum;
3205   }
3206 
3207   /* backward solve the upper triangular */
3208   for (i=n-1; i>=0; i--) {
3209     v   = aa + adiag[i+1] + 1;
3210     vi  = aj + adiag[i+1] + 1;
3211     nz  = adiag[i] - adiag[i+1]-1;
3212     sum = x[i];
3213     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3214     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3215   }
3216 
3217   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3218   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3219   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3220   PetscFunctionReturn(0);
3221 }
3222 
3223 #undef __FUNCT__
3224 #define __FUNCT__ "MatSolve_SeqAIJ"
3225 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3226 {
3227   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
3228   IS                iscol = a->col,isrow = a->row;
3229   PetscErrorCode    ierr;
3230   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3231   const PetscInt    *rout,*cout,*r,*c;
3232   PetscScalar       *x,*tmp,sum;
3233   const PetscScalar *b;
3234   const MatScalar   *aa = a->a,*v;
3235 
3236   PetscFunctionBegin;
3237   if (!n) PetscFunctionReturn(0);
3238 
3239   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3240   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3241   tmp  = a->solve_work;
3242 
3243   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
3244   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
3245 
3246   /* forward solve the lower triangular */
3247   tmp[0] = b[r[0]];
3248   v      = aa;
3249   vi     = aj;
3250   for (i=1; i<n; i++) {
3251     nz  = ai[i+1] - ai[i];
3252     sum = b[r[i]];
3253     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3254     tmp[i] = sum;
3255     v     += nz; vi += nz;
3256   }
3257 
3258   /* backward solve the upper triangular */
3259   for (i=n-1; i>=0; i--) {
3260     v   = aa + adiag[i+1]+1;
3261     vi  = aj + adiag[i+1]+1;
3262     nz  = adiag[i]-adiag[i+1]-1;
3263     sum = tmp[i];
3264     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3265     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3266   }
3267 
3268   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
3269   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
3270   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3271   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3272   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
3273   PetscFunctionReturn(0);
3274 }
3275 
3276 #undef __FUNCT__
3277 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
3278 /*
3279     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3280 */
3281 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3282 {
3283   Mat            B = *fact;
3284   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b;
3285   IS             isicol;
3286   PetscErrorCode ierr;
3287   const PetscInt *r,*ic;
3288   PetscInt       i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3289   PetscInt       *bi,*bj,*bdiag,*bdiag_rev;
3290   PetscInt       row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3291   PetscInt       nlnk,*lnk;
3292   PetscBT        lnkbt;
3293   PetscBool      row_identity,icol_identity;
3294   MatScalar      *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3295   const PetscInt *ics;
3296   PetscInt       j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3297   PetscReal      dt     =info->dt,shift=info->shiftamount;
3298   PetscInt       dtcount=(PetscInt)info->dtcount,nnz_max;
3299   PetscBool      missing;
3300 
3301   PetscFunctionBegin;
3302   if (dt      == PETSC_DEFAULT) dt      = 0.005;
3303   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3304 
3305   /* ------- symbolic factorization, can be reused ---------*/
3306   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3307   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3308   adiag=a->diag;
3309 
3310   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
3311 
3312   /* bdiag is location of diagonal in factor */
3313   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);     /* becomes b->diag */
3314   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */
3315 
3316   /* allocate row pointers bi */
3317   ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3318 
3319   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3320   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3321   nnz_max = ai[n]+2*n*dtcount+2;
3322 
3323   ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3324   ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3325 
3326   /* put together the new matrix */
3327   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
3328   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
3329   b    = (Mat_SeqAIJ*)B->data;
3330 
3331   b->free_a       = PETSC_TRUE;
3332   b->free_ij      = PETSC_TRUE;
3333   b->singlemalloc = PETSC_FALSE;
3334 
3335   b->a          = ba;
3336   b->j          = bj;
3337   b->i          = bi;
3338   b->diag       = bdiag;
3339   b->ilen       = 0;
3340   b->imax       = 0;
3341   b->row        = isrow;
3342   b->col        = iscol;
3343   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
3344   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
3345   b->icol       = isicol;
3346 
3347   ierr     = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3348   ierr     = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3349   b->maxnz = nnz_max;
3350 
3351   B->factortype            = MAT_FACTOR_ILUDT;
3352   B->info.factor_mallocs   = 0;
3353   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3354   CHKMEMQ;
3355   /* ------- end of symbolic factorization ---------*/
3356 
3357   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3358   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3359   ics  = ic;
3360 
3361   /* linked list for storing column indices of the active row */
3362   nlnk = n + 1;
3363   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3364 
3365   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3366   ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr);
3367   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3368   ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr);
3369   ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr);
3370 
3371   bi[0]        = 0;
3372   bdiag[0]     = nnz_max-1; /* location of diag[0] in factor B */
3373   bdiag_rev[n] = bdiag[0];
3374   bi[2*n+1]    = bdiag[0]+1; /* endof bj and ba array */
3375   for (i=0; i<n; i++) {
3376     /* copy initial fill into linked list */
3377     nzi = ai[r[i]+1] - ai[r[i]];
3378     if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3379     nzi_al = adiag[r[i]] - ai[r[i]];
3380     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3381     ajtmp  = aj + ai[r[i]];
3382     ierr   = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3383 
3384     /* load in initial (unfactored row) */
3385     aatmp = a->a + ai[r[i]];
3386     for (j=0; j<nzi; j++) {
3387       rtmp[ics[*ajtmp++]] = *aatmp++;
3388     }
3389 
3390     /* add pivot rows into linked list */
3391     row = lnk[n];
3392     while (row < i) {
3393       nzi_bl = bi[row+1] - bi[row] + 1;
3394       bjtmp  = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3395       ierr   = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
3396       nzi   += nlnk;
3397       row    = lnk[row];
3398     }
3399 
3400     /* copy data from lnk into jtmp, then initialize lnk */
3401     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
3402 
3403     /* numerical factorization */
3404     bjtmp = jtmp;
3405     row   = *bjtmp++; /* 1st pivot row */
3406     while (row < i) {
3407       pc         = rtmp + row;
3408       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3409       multiplier = (*pc) * (*pv);
3410       *pc        = multiplier;
3411       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3412         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3413         pv = ba + bdiag[row+1] + 1;
3414         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3415         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3416         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3417         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3418       }
3419       row = *bjtmp++;
3420     }
3421 
3422     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3423     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3424     nzi_bl   = 0; j = 0;
3425     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3426       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3427       nzi_bl++; j++;
3428     }
3429     nzi_bu = nzi - nzi_bl -1;
3430     while (j < nzi) {
3431       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3432       j++;
3433     }
3434 
3435     bjtmp = bj + bi[i];
3436     batmp = ba + bi[i];
3437     /* apply level dropping rule to L part */
3438     ncut = nzi_al + dtcount;
3439     if (ncut < nzi_bl) {
3440       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
3441       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
3442     } else {
3443       ncut = nzi_bl;
3444     }
3445     for (j=0; j<ncut; j++) {
3446       bjtmp[j] = jtmp[j];
3447       batmp[j] = vtmp[j];
3448       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3449     }
3450     bi[i+1] = bi[i] + ncut;
3451     nzi     = ncut + 1;
3452 
3453     /* apply level dropping rule to U part */
3454     ncut = nzi_au + dtcount;
3455     if (ncut < nzi_bu) {
3456       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
3457       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
3458     } else {
3459       ncut = nzi_bu;
3460     }
3461     nzi += ncut;
3462 
3463     /* mark bdiagonal */
3464     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3465     bdiag_rev[n-i-1] = bdiag[i+1];
3466     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3467     bjtmp            = bj + bdiag[i];
3468     batmp            = ba + bdiag[i];
3469     *bjtmp           = i;
3470     *batmp           = diag_tmp; /* rtmp[i]; */
3471     if (*batmp == 0.0) {
3472       *batmp = dt+shift;
3473       /* printf(" row %d add shift %g\n",i,shift); */
3474     }
3475     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3476     /* printf(" (%d,%g),",*bjtmp,*batmp); */
3477 
3478     bjtmp = bj + bdiag[i+1]+1;
3479     batmp = ba + bdiag[i+1]+1;
3480     for (k=0; k<ncut; k++) {
3481       bjtmp[k] = jtmp[nzi_bl+1+k];
3482       batmp[k] = vtmp[nzi_bl+1+k];
3483       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3484     }
3485     /* printf("\n"); */
3486 
3487     im[i] = nzi;   /* used by PetscLLAddSortedLU() */
3488     /*
3489     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3490     printf(" ----------------------------\n");
3491     */
3492   } /* for (i=0; i<n; i++) */
3493     /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3494   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3495 
3496   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3497   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3498 
3499   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3500   ierr = PetscFree2(im,jtmp);CHKERRQ(ierr);
3501   ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr);
3502   ierr = PetscFree(bdiag_rev);CHKERRQ(ierr);
3503 
3504   ierr     = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
3505   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3506 
3507   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3508   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
3509   if (row_identity && icol_identity) {
3510     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3511   } else {
3512     B->ops->solve = MatSolve_SeqAIJ;
3513   }
3514 
3515   B->ops->solveadd          = 0;
3516   B->ops->solvetranspose    = 0;
3517   B->ops->solvetransposeadd = 0;
3518   B->ops->matsolve          = 0;
3519   B->assembled              = PETSC_TRUE;
3520   B->preallocated           = PETSC_TRUE;
3521   PetscFunctionReturn(0);
3522 }
3523 
3524 /* a wraper of MatILUDTFactor_SeqAIJ() */
3525 #undef __FUNCT__
3526 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
3527 /*
3528     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3529 */
3530 
3531 PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3532 {
3533   PetscErrorCode ierr;
3534 
3535   PetscFunctionBegin;
3536   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
3537   PetscFunctionReturn(0);
3538 }
3539 
3540 /*
3541    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3542    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3543 */
3544 #undef __FUNCT__
3545 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
3546 /*
3547     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3548 */
3549 
3550 PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3551 {
3552   Mat            C     =fact;
3553   Mat_SeqAIJ     *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3554   IS             isrow = b->row,isicol = b->icol;
3555   PetscErrorCode ierr;
3556   const PetscInt *r,*ic,*ics;
3557   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3558   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3559   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3560   PetscReal      dt=info->dt,shift=info->shiftamount;
3561   PetscBool      row_identity, col_identity;
3562 
3563   PetscFunctionBegin;
3564   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3565   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3566   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
3567   ics  = ic;
3568 
3569   for (i=0; i<n; i++) {
3570     /* initialize rtmp array */
3571     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3572     bjtmp = bj + bi[i];
3573     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3574     rtmp[i] = 0.0;
3575     nzu     = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3576     bjtmp   = bj + bdiag[i+1] + 1;
3577     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3578 
3579     /* load in initial unfactored row of A */
3580     /* printf("row %d\n",i); */
3581     nz    = ai[r[i]+1] - ai[r[i]];
3582     ajtmp = aj + ai[r[i]];
3583     v     = aa + ai[r[i]];
3584     for (j=0; j<nz; j++) {
3585       rtmp[ics[*ajtmp++]] = v[j];
3586       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3587     }
3588     /* printf("\n"); */
3589 
3590     /* numerical factorization */
3591     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3592     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3593     k     = 0;
3594     while (k < nzl) {
3595       row = *bjtmp++;
3596       /* printf("  prow %d\n",row); */
3597       pc         = rtmp + row;
3598       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3599       multiplier = (*pc) * (*pv);
3600       *pc        = multiplier;
3601       if (PetscAbsScalar(multiplier) > dt) {
3602         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3603         pv = b->a + bdiag[row+1] + 1;
3604         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3605         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3606         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3607       }
3608       k++;
3609     }
3610 
3611     /* finished row so stick it into b->a */
3612     /* L-part */
3613     pv  = b->a + bi[i];
3614     pj  = bj + bi[i];
3615     nzl = bi[i+1] - bi[i];
3616     for (j=0; j<nzl; j++) {
3617       pv[j] = rtmp[pj[j]];
3618       /* printf(" (%d,%g),",pj[j],pv[j]); */
3619     }
3620 
3621     /* diagonal: invert diagonal entries for simplier triangular solves */
3622     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3623     b->a[bdiag[i]] = 1.0/rtmp[i];
3624     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3625 
3626     /* U-part */
3627     pv  = b->a + bdiag[i+1] + 1;
3628     pj  = bj + bdiag[i+1] + 1;
3629     nzu = bdiag[i] - bdiag[i+1] - 1;
3630     for (j=0; j<nzu; j++) {
3631       pv[j] = rtmp[pj[j]];
3632       /* printf(" (%d,%g),",pj[j],pv[j]); */
3633     }
3634     /* printf("\n"); */
3635   }
3636 
3637   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3638   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3639   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3640 
3641   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3642   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
3643   if (row_identity && col_identity) {
3644     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3645   } else {
3646     C->ops->solve = MatSolve_SeqAIJ;
3647   }
3648   C->ops->solveadd          = 0;
3649   C->ops->solvetranspose    = 0;
3650   C->ops->solvetransposeadd = 0;
3651   C->ops->matsolve          = 0;
3652   C->assembled              = PETSC_TRUE;
3653   C->preallocated           = PETSC_TRUE;
3654 
3655   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
3656   PetscFunctionReturn(0);
3657 }
3658