xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 0e9bae810fdaeb60e2713eaa8ddb89f42e079fd1)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 EXTERN_C_BEGIN
8 #undef __FUNCT__
9 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ"
10 /*
11       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
12 
13       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
14 */
15 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
16 {
17   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
18   PetscErrorCode    ierr;
19   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
20   const PetscInt    *ai = a->i, *aj = a->j;
21   const PetscScalar *aa = a->a;
22   PetscBool         *done;
23   PetscReal         best,past = 0,future;
24 
25   PetscFunctionBegin;
26   /* pick initial row */
27   best = -1;
28   for (i=0; i<n; i++) {
29     future = 0.0;
30     for (j=ai[i]; j<ai[i+1]; j++) {
31       if (aj[j] != i) future  += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]);
32     }
33     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34     if (past/future > best) {
35       best = past/future;
36       current = i;
37     }
38   }
39 
40   ierr = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr);
41   ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr);
42   ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr);
43   order[0] = current;
44   for (i=0; i<n-1; i++) {
45     done[current] = PETSC_TRUE;
46     best          = -1;
47     /* loop over all neighbors of current pivot */
48     for (j=ai[current]; j<ai[current+1]; j++) {
49       jj = aj[j];
50       if (done[jj]) continue;
51       /* loop over columns of potential next row computing weights for below and above diagonal */
52       past = future = 0.0;
53       for (k=ai[jj]; k<ai[jj+1]; k++) {
54         kk = aj[k];
55         if (done[kk]) past += PetscAbsScalar(aa[k]);
56         else if (kk != jj) future  += PetscAbsScalar(aa[k]);
57       }
58       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59       if (past/future > best) {
60         best = past/future;
61         newcurrent = jj;
62       }
63     }
64     if (best == -1) { /* no neighbors to select from so select best of all that remain */
65       best = -1;
66       for (k=0; k<n; k++) {
67         if (done[k]) continue;
68         future = 0.0;
69         past   = 0.0;
70         for (j=ai[k]; j<ai[k+1]; j++) {
71           kk = aj[j];
72           if (done[kk]) past += PetscAbsScalar(aa[j]);
73           else if (kk != k) future  += PetscAbsScalar(aa[j]);
74         }
75         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76         if (past/future > best) {
77           best = past/future;
78           newcurrent = k;
79         }
80       }
81     }
82     if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
83     current = newcurrent;
84     order[i+1] = current;
85   }
86   ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr);
87   *icol = *irow;
88   ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr);
89   ierr = PetscFree(done);CHKERRQ(ierr);
90   ierr = PetscFree(order);CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 EXTERN_C_END
94 
95 EXTERN_C_BEGIN
96 #undef __FUNCT__
97 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
98 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool  *flg)
99 {
100   PetscFunctionBegin;
101   *flg = PETSC_TRUE;
102   PetscFunctionReturn(0);
103 }
104 EXTERN_C_END
105 
106 EXTERN_C_BEGIN
107 #undef __FUNCT__
108 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
109 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
110 {
111   PetscInt           n = A->rmap->n;
112   PetscErrorCode     ierr;
113 
114   PetscFunctionBegin;
115 #if defined(PETSC_USE_COMPLEX)
116   if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC))SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported");
117 #endif
118   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
119   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
120   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
121     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
122     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
123     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
124     ierr = MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
125   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
126     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
127     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
128     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
129     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
130   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
131   (*B)->factortype = ftype;
132   PetscFunctionReturn(0);
133 }
134 EXTERN_C_END
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace"
138 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
139 {
140   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
141   IS                 isicol;
142   PetscErrorCode     ierr;
143   const PetscInt     *r,*ic;
144   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
145   PetscInt           *bi,*bj,*ajtmp;
146   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
147   PetscReal          f;
148   PetscInt           nlnk,*lnk,k,**bi_ptr;
149   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
150   PetscBT            lnkbt;
151 
152   PetscFunctionBegin;
153   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
154   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
155   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
156   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
157 
158   /* get new row pointers */
159   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
160   bi[0] = 0;
161 
162   /* bdiag is location of diagonal in factor */
163   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
164   bdiag[0] = 0;
165 
166   /* linked list for storing column indices of the active row */
167   nlnk = n + 1;
168   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
169 
170   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
171 
172   /* initial FreeSpace size is f*(ai[n]+1) */
173   f = info->fill;
174   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
175   current_space = free_space;
176 
177   for (i=0; i<n; i++) {
178     /* copy previous fill into linked list */
179     nzi = 0;
180     nnz = ai[r[i]+1] - ai[r[i]];
181     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
182     ajtmp = aj + ai[r[i]];
183     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
184     nzi += nlnk;
185 
186     /* add pivot rows into linked list */
187     row = lnk[n];
188     while (row < i) {
189       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
190       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
191       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
192       nzi += nlnk;
193       row  = lnk[row];
194     }
195     bi[i+1] = bi[i] + nzi;
196     im[i]   = nzi;
197 
198     /* mark bdiag */
199     nzbd = 0;
200     nnz  = nzi;
201     k    = lnk[n];
202     while (nnz-- && k < i){
203       nzbd++;
204       k = lnk[k];
205     }
206     bdiag[i] = bi[i] + nzbd;
207 
208     /* if free space is not available, make more free space */
209     if (current_space->local_remaining<nzi) {
210       nnz = (n - i)*nzi; /* estimated and max additional space needed */
211       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
212       reallocs++;
213     }
214 
215     /* copy data into free space, then initialize lnk */
216     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
217     bi_ptr[i] = current_space->array;
218     current_space->array           += nzi;
219     current_space->local_used      += nzi;
220     current_space->local_remaining -= nzi;
221   }
222 #if defined(PETSC_USE_INFO)
223   if (ai[n] != 0) {
224     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
225     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
226     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
227     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
228     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
229   } else {
230     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
231   }
232 #endif
233 
234   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
235   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
236 
237   /* destroy list of free space and other temporary array(s) */
238   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
239   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
240   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
241   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
242 
243   /* put together the new matrix */
244   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
245   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
246   b    = (Mat_SeqAIJ*)(B)->data;
247   b->free_a       = PETSC_TRUE;
248   b->free_ij      = PETSC_TRUE;
249   b->singlemalloc = PETSC_FALSE;
250   ierr          = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
251   b->j          = bj;
252   b->i          = bi;
253   b->diag       = bdiag;
254   b->ilen       = 0;
255   b->imax       = 0;
256   b->row        = isrow;
257   b->col        = iscol;
258   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
259   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
260   b->icol       = isicol;
261   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
262 
263   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
264   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
265   b->maxnz = b->nz = bi[n] ;
266 
267   (B)->factortype            = MAT_FACTOR_LU;
268   (B)->info.factor_mallocs   = reallocs;
269   (B)->info.fill_ratio_given = f;
270 
271   if (ai[n]) {
272     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
273   } else {
274     (B)->info.fill_ratio_needed = 0.0;
275   }
276   (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_inplace;
277   if (a->inode.size) {
278     (B)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
285 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
286 {
287   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
288   IS                 isicol;
289   PetscErrorCode     ierr;
290   const PetscInt     *r,*ic;
291   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
292   PetscInt           *bi,*bj,*ajtmp;
293   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
294   PetscReal          f;
295   PetscInt           nlnk,*lnk,k,**bi_ptr;
296   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
297   PetscBT            lnkbt;
298 
299   PetscFunctionBegin;
300   /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */
301   /*
302   PetscBool          olddatastruct=PETSC_FALSE;
303   ierr = PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
304   if(olddatastruct){
305     ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr);
306     PetscFunctionReturn(0);
307   }
308   */
309   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
310   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
311   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
312   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
313 
314   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
315   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
316   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
317   bi[0] = bdiag[0] = 0;
318 
319   /* linked list for storing column indices of the active row */
320   nlnk = n + 1;
321   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
322 
323   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
324 
325   /* initial FreeSpace size is f*(ai[n]+1) */
326   f = info->fill;
327   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
328   current_space = free_space;
329 
330   for (i=0; i<n; i++) {
331     /* copy previous fill into linked list */
332     nzi = 0;
333     nnz = ai[r[i]+1] - ai[r[i]];
334     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
335     ajtmp = aj + ai[r[i]];
336     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
337     nzi += nlnk;
338 
339     /* add pivot rows into linked list */
340     row = lnk[n];
341     while (row < i){
342       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
343       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
344       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
345       nzi  += nlnk;
346       row   = lnk[row];
347     }
348     bi[i+1] = bi[i] + nzi;
349     im[i]   = nzi;
350 
351     /* mark bdiag */
352     nzbd = 0;
353     nnz  = nzi;
354     k    = lnk[n];
355     while (nnz-- && k < i){
356       nzbd++;
357       k = lnk[k];
358     }
359     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
360 
361     /* if free space is not available, make more free space */
362     if (current_space->local_remaining<nzi) {
363       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
364       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
365       reallocs++;
366     }
367 
368     /* copy data into free space, then initialize lnk */
369     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
370     bi_ptr[i] = current_space->array;
371     current_space->array           += nzi;
372     current_space->local_used      += nzi;
373     current_space->local_remaining -= nzi;
374   }
375 
376   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
377   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
378 
379   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
380   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
381   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
382   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
383   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
384 
385   /* put together the new matrix */
386   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
387   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
388   b    = (Mat_SeqAIJ*)(B)->data;
389   b->free_a       = PETSC_TRUE;
390   b->free_ij      = PETSC_TRUE;
391   b->singlemalloc = PETSC_FALSE;
392   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
393   b->j          = bj;
394   b->i          = bi;
395   b->diag       = bdiag;
396   b->ilen       = 0;
397   b->imax       = 0;
398   b->row        = isrow;
399   b->col        = iscol;
400   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
401   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
402   b->icol       = isicol;
403   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
404 
405   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
406   ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
407   b->maxnz = b->nz = bdiag[0]+1;
408   B->factortype            = MAT_FACTOR_LU;
409   B->info.factor_mallocs   = reallocs;
410   B->info.fill_ratio_given = f;
411 
412   if (ai[n]) {
413     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
414   } else {
415     B->info.fill_ratio_needed = 0.0;
416   }
417 #if defined(PETSC_USE_INFO)
418   if (ai[n] != 0) {
419     PetscReal af = B->info.fill_ratio_needed;
420     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
421     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
422     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
423     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
424   } else {
425     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
426   }
427 #endif
428   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
429   if (a->inode.size) {
430     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
431   }
432   PetscFunctionReturn(0);
433 }
434 
435 /*
436     Trouble in factorization, should we dump the original matrix?
437 */
438 #undef __FUNCT__
439 #define __FUNCT__ "MatFactorDumpMatrix"
440 PetscErrorCode MatFactorDumpMatrix(Mat A)
441 {
442   PetscErrorCode ierr;
443   PetscBool      flg = PETSC_FALSE;
444 
445   PetscFunctionBegin;
446   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr);
447   if (flg) {
448     PetscViewer viewer;
449     char        filename[PETSC_MAX_PATH_LEN];
450 
451     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
452     ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
453     ierr = MatView(A,viewer);CHKERRQ(ierr);
454     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
455   }
456   PetscFunctionReturn(0);
457 }
458 
459 #undef __FUNCT__
460 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
461 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
462 {
463   Mat              C=B;
464   Mat_SeqAIJ       *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
465   IS               isrow = b->row,isicol = b->icol;
466   PetscErrorCode   ierr;
467   const PetscInt   *r,*ic,*ics;
468   const PetscInt   n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
469   PetscInt         i,j,k,nz,nzL,row,*pj;
470   const PetscInt   *ajtmp,*bjtmp;
471   MatScalar        *rtmp,*pc,multiplier,*pv;
472   const  MatScalar *aa=a->a,*v;
473   PetscBool        row_identity,col_identity;
474   FactorShiftCtx   sctx;
475   const PetscInt   *ddiag;
476   PetscReal        rs;
477   MatScalar        d;
478 
479   PetscFunctionBegin;
480   /* MatPivotSetUp(): initialize shift context sctx */
481   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
482 
483   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
484     ddiag          = a->diag;
485     sctx.shift_top = info->zeropivot;
486     for (i=0; i<n; i++) {
487       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
488       d  = (aa)[ddiag[i]];
489       rs = -PetscAbsScalar(d) - PetscRealPart(d);
490       v  = aa+ai[i];
491       nz = ai[i+1] - ai[i];
492       for (j=0; j<nz; j++)
493 	rs += PetscAbsScalar(v[j]);
494       if (rs>sctx.shift_top) sctx.shift_top = rs;
495     }
496     sctx.shift_top   *= 1.1;
497     sctx.nshift_max   = 5;
498     sctx.shift_lo     = 0.;
499     sctx.shift_hi     = 1.;
500   }
501 
502   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
503   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
504   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
505   ics  = ic;
506 
507   do {
508     sctx.newshift = PETSC_FALSE;
509     for (i=0; i<n; i++){
510       /* zero rtmp */
511       /* L part */
512       nz    = bi[i+1] - bi[i];
513       bjtmp = bj + bi[i];
514       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
515 
516       /* U part */
517       nz = bdiag[i]-bdiag[i+1];
518       bjtmp = bj + bdiag[i+1]+1;
519       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
520 
521       /* load in initial (unfactored row) */
522       nz    = ai[r[i]+1] - ai[r[i]];
523       ajtmp = aj + ai[r[i]];
524       v     = aa + ai[r[i]];
525       for (j=0; j<nz; j++) {
526         rtmp[ics[ajtmp[j]]] = v[j];
527       }
528       /* ZeropivotApply() */
529       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
530 
531       /* elimination */
532       bjtmp = bj + bi[i];
533       row   = *bjtmp++;
534       nzL   = bi[i+1] - bi[i];
535       for(k=0; k < nzL;k++) {
536         pc = rtmp + row;
537         if (*pc != 0.0) {
538           pv         = b->a + bdiag[row];
539           multiplier = *pc * (*pv);
540           *pc        = multiplier;
541           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
542 	  pv = b->a + bdiag[row+1]+1;
543 	  nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
544           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
545           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
546         }
547         row = *bjtmp++;
548       }
549 
550       /* finished row so stick it into b->a */
551       rs = 0.0;
552       /* L part */
553       pv   = b->a + bi[i] ;
554       pj   = b->j + bi[i] ;
555       nz   = bi[i+1] - bi[i];
556       for (j=0; j<nz; j++) {
557         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
558       }
559 
560       /* U part */
561       pv = b->a + bdiag[i+1]+1;
562       pj = b->j + bdiag[i+1]+1;
563       nz = bdiag[i] - bdiag[i+1]-1;
564       for (j=0; j<nz; j++) {
565         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
566       }
567 
568       sctx.rs  = rs;
569       sctx.pv  = rtmp[i];
570       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
571       if(sctx.newshift) break; /* break for-loop */
572       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
573 
574       /* Mark diagonal and invert diagonal for simplier triangular solves */
575       pv  = b->a + bdiag[i];
576       *pv = 1.0/rtmp[i];
577 
578     } /* endof for (i=0; i<n; i++){ */
579 
580     /* MatPivotRefine() */
581     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){
582       /*
583        * if no shift in this attempt & shifting & started shifting & can refine,
584        * then try lower shift
585        */
586       sctx.shift_hi       = sctx.shift_fraction;
587       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
588       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
589       sctx.newshift       = PETSC_TRUE;
590       sctx.nshift++;
591     }
592   } while (sctx.newshift);
593 
594   ierr = PetscFree(rtmp);CHKERRQ(ierr);
595   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
596   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
597 
598   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
599   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
600   if (row_identity && col_identity) {
601     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
602   } else {
603     C->ops->solve = MatSolve_SeqAIJ;
604   }
605   C->ops->solveadd           = MatSolveAdd_SeqAIJ;
606   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ;
607   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ;
608   C->ops->matsolve           = MatMatSolve_SeqAIJ;
609   C->assembled    = PETSC_TRUE;
610   C->preallocated = PETSC_TRUE;
611   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
612 
613   /* MatShiftView(A,info,&sctx) */
614   if (sctx.nshift){
615     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
616       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
617     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
618       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
619     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
620       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
621     }
622   }
623   ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr);
624   PetscFunctionReturn(0);
625 }
626 
627 #undef __FUNCT__
628 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace"
629 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
630 {
631   Mat             C=B;
632   Mat_SeqAIJ      *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
633   IS              isrow = b->row,isicol = b->icol;
634   PetscErrorCode  ierr;
635   const PetscInt   *r,*ic,*ics;
636   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
637   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
638   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
639   MatScalar       *pv,*rtmp,*pc,multiplier,d;
640   const MatScalar *v,*aa=a->a;
641   PetscReal       rs=0.0;
642   FactorShiftCtx  sctx;
643   const PetscInt  *ddiag;
644   PetscBool       row_identity, col_identity;
645 
646   PetscFunctionBegin;
647   /* MatPivotSetUp(): initialize shift context sctx */
648   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
649 
650   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
651     ddiag          = a->diag;
652     sctx.shift_top = info->zeropivot;
653     for (i=0; i<n; i++) {
654       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
655       d  = (aa)[ddiag[i]];
656       rs = -PetscAbsScalar(d) - PetscRealPart(d);
657       v  = aa+ai[i];
658       nz = ai[i+1] - ai[i];
659       for (j=0; j<nz; j++)
660 	rs += PetscAbsScalar(v[j]);
661       if (rs>sctx.shift_top) sctx.shift_top = rs;
662     }
663     sctx.shift_top   *= 1.1;
664     sctx.nshift_max   = 5;
665     sctx.shift_lo     = 0.;
666     sctx.shift_hi     = 1.;
667   }
668 
669   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
670   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
671   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
672   ics  = ic;
673 
674   do {
675     sctx.newshift = PETSC_FALSE;
676     for (i=0; i<n; i++){
677       nz    = bi[i+1] - bi[i];
678       bjtmp = bj + bi[i];
679       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
680 
681       /* load in initial (unfactored row) */
682       nz    = ai[r[i]+1] - ai[r[i]];
683       ajtmp = aj + ai[r[i]];
684       v     = aa + ai[r[i]];
685       for (j=0; j<nz; j++) {
686         rtmp[ics[ajtmp[j]]] = v[j];
687       }
688       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
689 
690       row = *bjtmp++;
691       while  (row < i) {
692         pc = rtmp + row;
693         if (*pc != 0.0) {
694           pv         = b->a + diag_offset[row];
695           pj         = b->j + diag_offset[row] + 1;
696           multiplier = *pc / *pv++;
697           *pc        = multiplier;
698           nz         = bi[row+1] - diag_offset[row] - 1;
699           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
700           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
701         }
702         row = *bjtmp++;
703       }
704       /* finished row so stick it into b->a */
705       pv   = b->a + bi[i] ;
706       pj   = b->j + bi[i] ;
707       nz   = bi[i+1] - bi[i];
708       diag = diag_offset[i] - bi[i];
709       rs   = 0.0;
710       for (j=0; j<nz; j++) {
711         pv[j] = rtmp[pj[j]];
712         rs   += PetscAbsScalar(pv[j]);
713       }
714       rs   -= PetscAbsScalar(pv[diag]);
715 
716       sctx.rs  = rs;
717       sctx.pv  = pv[diag];
718       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
719       if (sctx.newshift) break;
720       pv[diag] = sctx.pv;
721     }
722 
723     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
724       /*
725        * if no shift in this attempt & shifting & started shifting & can refine,
726        * then try lower shift
727        */
728       sctx.shift_hi       = sctx.shift_fraction;
729       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
730       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
731       sctx.newshift       = PETSC_TRUE;
732       sctx.nshift++;
733     }
734   } while (sctx.newshift);
735 
736   /* invert diagonal entries for simplier triangular solves */
737   for (i=0; i<n; i++) {
738     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
739   }
740   ierr = PetscFree(rtmp);CHKERRQ(ierr);
741   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
742   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
743 
744   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
745   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
746   if (row_identity && col_identity) {
747     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering_inplace;
748   } else {
749     C->ops->solve   = MatSolve_SeqAIJ_inplace;
750   }
751   C->ops->solveadd           = MatSolveAdd_SeqAIJ_inplace;
752   C->ops->solvetranspose     = MatSolveTranspose_SeqAIJ_inplace;
753   C->ops->solvetransposeadd  = MatSolveTransposeAdd_SeqAIJ_inplace;
754   C->ops->matsolve           = MatMatSolve_SeqAIJ_inplace;
755   C->assembled    = PETSC_TRUE;
756   C->preallocated = PETSC_TRUE;
757   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
758   if (sctx.nshift){
759      if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
760       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
761     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
762       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
763     }
764   }
765   (C)->ops->solve            = MatSolve_SeqAIJ_inplace;
766   (C)->ops->solvetranspose   = MatSolveTranspose_SeqAIJ_inplace;
767   ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr);
768   PetscFunctionReturn(0);
769 }
770 
771 /*
772    This routine implements inplace ILU(0) with row or/and column permutations.
773    Input:
774      A - original matrix
775    Output;
776      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
777          a->j (col index) is permuted by the inverse of colperm, then sorted
778          a->a reordered accordingly with a->j
779          a->diag (ptr to diagonal elements) is updated.
780 */
781 #undef __FUNCT__
782 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
783 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
784 {
785   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
786   IS             isrow = a->row,isicol = a->icol;
787   PetscErrorCode ierr;
788   const PetscInt *r,*ic,*ics;
789   PetscInt       i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
790   PetscInt       *ajtmp,nz,row;
791   PetscInt       *diag = a->diag,nbdiag,*pj;
792   PetscScalar    *rtmp,*pc,multiplier,d;
793   MatScalar      *pv,*v;
794   PetscReal      rs;
795   FactorShiftCtx sctx;
796   const  MatScalar *aa=a->a,*vtmp;
797 
798   PetscFunctionBegin;
799   if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
800 
801   /* MatPivotSetUp(): initialize shift context sctx */
802   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
803 
804   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
805     const PetscInt   *ddiag = a->diag;
806     sctx.shift_top = info->zeropivot;
807     for (i=0; i<n; i++) {
808       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
809       d  = (aa)[ddiag[i]];
810       rs = -PetscAbsScalar(d) - PetscRealPart(d);
811       vtmp  = aa+ai[i];
812       nz = ai[i+1] - ai[i];
813       for (j=0; j<nz; j++)
814 	rs += PetscAbsScalar(vtmp[j]);
815       if (rs>sctx.shift_top) sctx.shift_top = rs;
816     }
817     sctx.shift_top   *= 1.1;
818     sctx.nshift_max   = 5;
819     sctx.shift_lo     = 0.;
820     sctx.shift_hi     = 1.;
821   }
822 
823   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
824   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
825   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
826   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
827   ics = ic;
828 
829 #if defined(MV)
830   sctx.shift_top      = 0.;
831   sctx.nshift_max     = 0;
832   sctx.shift_lo       = 0.;
833   sctx.shift_hi       = 0.;
834   sctx.shift_fraction = 0.;
835 
836   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
837     sctx.shift_top = 0.;
838     for (i=0; i<n; i++) {
839       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
840       d  = (a->a)[diag[i]];
841       rs = -PetscAbsScalar(d) - PetscRealPart(d);
842       v  = a->a+ai[i];
843       nz = ai[i+1] - ai[i];
844       for (j=0; j<nz; j++)
845 	rs += PetscAbsScalar(v[j]);
846       if (rs>sctx.shift_top) sctx.shift_top = rs;
847     }
848     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
849     sctx.shift_top    *= 1.1;
850     sctx.nshift_max   = 5;
851     sctx.shift_lo     = 0.;
852     sctx.shift_hi     = 1.;
853   }
854 
855   sctx.shift_amount = 0.;
856   sctx.nshift       = 0;
857 #endif
858 
859   do {
860     sctx.newshift = PETSC_FALSE;
861     for (i=0; i<n; i++){
862       /* load in initial unfactored row */
863       nz    = ai[r[i]+1] - ai[r[i]];
864       ajtmp = aj + ai[r[i]];
865       v     = a->a + ai[r[i]];
866       /* sort permuted ajtmp and values v accordingly */
867       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
868       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
869 
870       diag[r[i]] = ai[r[i]];
871       for (j=0; j<nz; j++) {
872         rtmp[ajtmp[j]] = v[j];
873         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
874       }
875       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
876 
877       row = *ajtmp++;
878       while  (row < i) {
879         pc = rtmp + row;
880         if (*pc != 0.0) {
881           pv         = a->a + diag[r[row]];
882           pj         = aj + diag[r[row]] + 1;
883 
884           multiplier = *pc / *pv++;
885           *pc        = multiplier;
886           nz         = ai[r[row]+1] - diag[r[row]] - 1;
887           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
888           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
889         }
890         row = *ajtmp++;
891       }
892       /* finished row so overwrite it onto a->a */
893       pv   = a->a + ai[r[i]] ;
894       pj   = aj + ai[r[i]] ;
895       nz   = ai[r[i]+1] - ai[r[i]];
896       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
897 
898       rs   = 0.0;
899       for (j=0; j<nz; j++) {
900         pv[j] = rtmp[pj[j]];
901         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
902       }
903 
904       sctx.rs  = rs;
905       sctx.pv  = pv[nbdiag];
906       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
907       if (sctx.newshift) break;
908       pv[nbdiag] = sctx.pv;
909     }
910 
911     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
912       /*
913        * if no shift in this attempt & shifting & started shifting & can refine,
914        * then try lower shift
915        */
916       sctx.shift_hi        = sctx.shift_fraction;
917       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
918       sctx.shift_amount    = sctx.shift_fraction * sctx.shift_top;
919       sctx.newshift         = PETSC_TRUE;
920       sctx.nshift++;
921     }
922   } while (sctx.newshift);
923 
924   /* invert diagonal entries for simplier triangular solves */
925   for (i=0; i<n; i++) {
926     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
927   }
928 
929   ierr = PetscFree(rtmp);CHKERRQ(ierr);
930   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
931   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
932   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
933   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
934   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
935   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
936   A->assembled = PETSC_TRUE;
937   A->preallocated = PETSC_TRUE;
938   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
939   if (sctx.nshift){
940     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
941       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
942     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
943       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
944     }
945   }
946   PetscFunctionReturn(0);
947 }
948 
949 /* ----------------------------------------------------------- */
950 #undef __FUNCT__
951 #define __FUNCT__ "MatLUFactor_SeqAIJ"
952 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
953 {
954   PetscErrorCode ierr;
955   Mat            C;
956 
957   PetscFunctionBegin;
958   ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
959   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
960   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
961   A->ops->solve            = C->ops->solve;
962   A->ops->solvetranspose   = C->ops->solvetranspose;
963   ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
964   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 /* ----------------------------------------------------------- */
968 
969 
970 #undef __FUNCT__
971 #define __FUNCT__ "MatSolve_SeqAIJ_inplace"
972 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
973 {
974   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
975   IS                iscol = a->col,isrow = a->row;
976   PetscErrorCode    ierr;
977   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
978   PetscInt          nz;
979   const PetscInt    *rout,*cout,*r,*c;
980   PetscScalar       *x,*tmp,*tmps,sum;
981   const PetscScalar *b;
982   const MatScalar   *aa = a->a,*v;
983 
984   PetscFunctionBegin;
985   if (!n) PetscFunctionReturn(0);
986 
987   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
988   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
989   tmp  = a->solve_work;
990 
991   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
992   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
993 
994   /* forward solve the lower triangular */
995   tmp[0] = b[*r++];
996   tmps   = tmp;
997   for (i=1; i<n; i++) {
998     v   = aa + ai[i] ;
999     vi  = aj + ai[i] ;
1000     nz  = a->diag[i] - ai[i];
1001     sum = b[*r++];
1002     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1003     tmp[i] = sum;
1004   }
1005 
1006   /* backward solve the upper triangular */
1007   for (i=n-1; i>=0; i--){
1008     v   = aa + a->diag[i] + 1;
1009     vi  = aj + a->diag[i] + 1;
1010     nz  = ai[i+1] - a->diag[i] - 1;
1011     sum = tmp[i];
1012     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1013     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1014   }
1015 
1016   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1017   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1018   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1019   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1020   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace"
1026 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1027 {
1028   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1029   IS              iscol = a->col,isrow = a->row;
1030   PetscErrorCode  ierr;
1031   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1032   PetscInt        nz,neq;
1033   const PetscInt  *rout,*cout,*r,*c;
1034   PetscScalar     *x,*b,*tmp,*tmps,sum;
1035   const MatScalar *aa = a->a,*v;
1036   PetscBool       bisdense,xisdense;
1037 
1038   PetscFunctionBegin;
1039   if (!n) PetscFunctionReturn(0);
1040 
1041   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1042   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1043   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1044   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1045 
1046   ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
1047   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1048 
1049   tmp  = a->solve_work;
1050   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1051   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1052 
1053   for (neq=0; neq<B->cmap->n; neq++){
1054     /* forward solve the lower triangular */
1055     tmp[0] = b[r[0]];
1056     tmps   = tmp;
1057     for (i=1; i<n; i++) {
1058       v   = aa + ai[i] ;
1059       vi  = aj + ai[i] ;
1060       nz  = a->diag[i] - ai[i];
1061       sum = b[r[i]];
1062       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1063       tmp[i] = sum;
1064     }
1065     /* backward solve the upper triangular */
1066     for (i=n-1; i>=0; i--){
1067       v   = aa + a->diag[i] + 1;
1068       vi  = aj + a->diag[i] + 1;
1069       nz  = ai[i+1] - a->diag[i] - 1;
1070       sum = tmp[i];
1071       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1072       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1073     }
1074 
1075     b += n;
1076     x += n;
1077   }
1078   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1079   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1080   ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
1081   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1082   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatMatSolve_SeqAIJ"
1088 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1089 {
1090   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1091   IS              iscol = a->col,isrow = a->row;
1092   PetscErrorCode  ierr;
1093   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1094   PetscInt        nz,neq;
1095   const PetscInt  *rout,*cout,*r,*c;
1096   PetscScalar     *x,*b,*tmp,sum;
1097   const MatScalar *aa = a->a,*v;
1098   PetscBool       bisdense,xisdense;
1099 
1100   PetscFunctionBegin;
1101   if (!n) PetscFunctionReturn(0);
1102 
1103   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1104   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1105   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1106   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1107 
1108   ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
1109   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1110 
1111   tmp  = a->solve_work;
1112   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1113   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1114 
1115   for (neq=0; neq<B->cmap->n; neq++){
1116     /* forward solve the lower triangular */
1117     tmp[0] = b[r[0]];
1118     v      = aa;
1119     vi     = aj;
1120     for (i=1; i<n; i++) {
1121       nz  = ai[i+1] - ai[i];
1122       sum = b[r[i]];
1123       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1124       tmp[i] = sum;
1125       v += nz; vi += nz;
1126     }
1127 
1128     /* backward solve the upper triangular */
1129     for (i=n-1; i>=0; i--){
1130       v   = aa + adiag[i+1]+1;
1131       vi  = aj + adiag[i+1]+1;
1132       nz  = adiag[i]-adiag[i+1]-1;
1133       sum = tmp[i];
1134       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1135       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1136     }
1137 
1138     b += n;
1139     x += n;
1140   }
1141   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1142   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1143   ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
1144   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1145   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
1151 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1152 {
1153   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
1154   IS              iscol = a->col,isrow = a->row;
1155   PetscErrorCode  ierr;
1156   const PetscInt  *r,*c,*rout,*cout;
1157   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1158   PetscInt        nz,row;
1159   PetscScalar     *x,*b,*tmp,*tmps,sum;
1160   const MatScalar *aa = a->a,*v;
1161 
1162   PetscFunctionBegin;
1163   if (!n) PetscFunctionReturn(0);
1164 
1165   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1166   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1167   tmp  = a->solve_work;
1168 
1169   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1170   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1171 
1172   /* forward solve the lower triangular */
1173   tmp[0] = b[*r++];
1174   tmps   = tmp;
1175   for (row=1; row<n; row++) {
1176     i   = rout[row]; /* permuted row */
1177     v   = aa + ai[i] ;
1178     vi  = aj + ai[i] ;
1179     nz  = a->diag[i] - ai[i];
1180     sum = b[*r++];
1181     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1182     tmp[row] = sum;
1183   }
1184 
1185   /* backward solve the upper triangular */
1186   for (row=n-1; row>=0; row--){
1187     i   = rout[row]; /* permuted row */
1188     v   = aa + a->diag[i] + 1;
1189     vi  = aj + a->diag[i] + 1;
1190     nz  = ai[i+1] - a->diag[i] - 1;
1191     sum = tmp[row];
1192     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1193     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1194   }
1195 
1196   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1197   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1198   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1199   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1200   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 /* ----------------------------------------------------------- */
1205 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1206 #undef __FUNCT__
1207 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace"
1208 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1209 {
1210   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1211   PetscErrorCode    ierr;
1212   PetscInt          n = A->rmap->n;
1213   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1214   PetscScalar       *x;
1215   const PetscScalar *b;
1216   const MatScalar   *aa = a->a;
1217 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1218   PetscInt          adiag_i,i,nz,ai_i;
1219   const PetscInt    *vi;
1220   const MatScalar   *v;
1221   PetscScalar       sum;
1222 #endif
1223 
1224   PetscFunctionBegin;
1225   if (!n) PetscFunctionReturn(0);
1226 
1227   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1228   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1229 
1230 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1231   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1232 #else
1233   /* forward solve the lower triangular */
1234   x[0] = b[0];
1235   for (i=1; i<n; i++) {
1236     ai_i = ai[i];
1237     v    = aa + ai_i;
1238     vi   = aj + ai_i;
1239     nz   = adiag[i] - ai_i;
1240     sum  = b[i];
1241     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1242     x[i] = sum;
1243   }
1244 
1245   /* backward solve the upper triangular */
1246   for (i=n-1; i>=0; i--){
1247     adiag_i = adiag[i];
1248     v       = aa + adiag_i + 1;
1249     vi      = aj + adiag_i + 1;
1250     nz      = ai[i+1] - adiag_i - 1;
1251     sum     = x[i];
1252     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1253     x[i]    = sum*aa[adiag_i];
1254   }
1255 #endif
1256   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1257   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1258   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace"
1264 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1265 {
1266   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1267   IS                iscol = a->col,isrow = a->row;
1268   PetscErrorCode    ierr;
1269   PetscInt          i, n = A->rmap->n,j;
1270   PetscInt          nz;
1271   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1272   PetscScalar       *x,*tmp,sum;
1273   const PetscScalar *b;
1274   const MatScalar   *aa = a->a,*v;
1275 
1276   PetscFunctionBegin;
1277   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1278 
1279   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1280   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1281   tmp  = a->solve_work;
1282 
1283   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1284   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1285 
1286   /* forward solve the lower triangular */
1287   tmp[0] = b[*r++];
1288   for (i=1; i<n; i++) {
1289     v   = aa + ai[i] ;
1290     vi  = aj + ai[i] ;
1291     nz  = a->diag[i] - ai[i];
1292     sum = b[*r++];
1293     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1294     tmp[i] = sum;
1295   }
1296 
1297   /* backward solve the upper triangular */
1298   for (i=n-1; i>=0; i--){
1299     v   = aa + a->diag[i] + 1;
1300     vi  = aj + a->diag[i] + 1;
1301     nz  = ai[i+1] - a->diag[i] - 1;
1302     sum = tmp[i];
1303     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1304     tmp[i] = sum*aa[a->diag[i]];
1305     x[*c--] += tmp[i];
1306   }
1307 
1308   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1309   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1310   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1311   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1312   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1313 
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1319 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1320 {
1321   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1322   IS                iscol = a->col,isrow = a->row;
1323   PetscErrorCode    ierr;
1324   PetscInt          i, n = A->rmap->n,j;
1325   PetscInt          nz;
1326   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1327   PetscScalar       *x,*tmp,sum;
1328   const PetscScalar *b;
1329   const MatScalar   *aa = a->a,*v;
1330 
1331   PetscFunctionBegin;
1332   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1333 
1334   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1335   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1336   tmp  = a->solve_work;
1337 
1338   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1339   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1340 
1341   /* forward solve the lower triangular */
1342   tmp[0] = b[r[0]];
1343   v      = aa;
1344   vi     = aj;
1345   for (i=1; i<n; i++) {
1346     nz  = ai[i+1] - ai[i];
1347     sum = b[r[i]];
1348     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1349     tmp[i] = sum;
1350     v += nz; vi += nz;
1351   }
1352 
1353   /* backward solve the upper triangular */
1354   v  = aa + adiag[n-1];
1355   vi = aj + adiag[n-1];
1356   for (i=n-1; i>=0; i--){
1357     nz  = adiag[i] - adiag[i+1] - 1;
1358     sum = tmp[i];
1359     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1360     tmp[i] = sum*v[nz];
1361     x[c[i]] += tmp[i];
1362     v += nz+1; vi += nz+1;
1363   }
1364 
1365   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1366   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1367   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1368   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1369   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1370 
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace"
1376 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1377 {
1378   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1379   IS                iscol = a->col,isrow = a->row;
1380   PetscErrorCode    ierr;
1381   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1382   PetscInt          i,n = A->rmap->n,j;
1383   PetscInt          nz;
1384   PetscScalar       *x,*tmp,s1;
1385   const MatScalar   *aa = a->a,*v;
1386   const PetscScalar *b;
1387 
1388   PetscFunctionBegin;
1389   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1390   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1391   tmp  = a->solve_work;
1392 
1393   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1394   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1395 
1396   /* copy the b into temp work space according to permutation */
1397   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1398 
1399   /* forward solve the U^T */
1400   for (i=0; i<n; i++) {
1401     v   = aa + diag[i] ;
1402     vi  = aj + diag[i] + 1;
1403     nz  = ai[i+1] - diag[i] - 1;
1404     s1  = tmp[i];
1405     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1406     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1407     tmp[i] = s1;
1408   }
1409 
1410   /* backward solve the L^T */
1411   for (i=n-1; i>=0; i--){
1412     v   = aa + diag[i] - 1 ;
1413     vi  = aj + diag[i] - 1 ;
1414     nz  = diag[i] - ai[i];
1415     s1  = tmp[i];
1416     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1417   }
1418 
1419   /* copy tmp into x according to permutation */
1420   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1421 
1422   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1423   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1424   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1425   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1426 
1427   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1433 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1434 {
1435   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1436   IS                iscol = a->col,isrow = a->row;
1437   PetscErrorCode    ierr;
1438   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1439   PetscInt          i,n = A->rmap->n,j;
1440   PetscInt          nz;
1441   PetscScalar       *x,*tmp,s1;
1442   const MatScalar   *aa = a->a,*v;
1443   const PetscScalar *b;
1444 
1445   PetscFunctionBegin;
1446   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1447   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1448   tmp  = a->solve_work;
1449 
1450   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1451   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1452 
1453   /* copy the b into temp work space according to permutation */
1454   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1455 
1456   /* forward solve the U^T */
1457   for (i=0; i<n; i++) {
1458     v   = aa + adiag[i+1] + 1;
1459     vi  = aj + adiag[i+1] + 1;
1460     nz  = adiag[i] - adiag[i+1] - 1;
1461     s1  = tmp[i];
1462     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1463     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1464     tmp[i] = s1;
1465   }
1466 
1467   /* backward solve the L^T */
1468   for (i=n-1; i>=0; i--){
1469     v   = aa + ai[i];
1470     vi  = aj + ai[i];
1471     nz  = ai[i+1] - ai[i];
1472     s1  = tmp[i];
1473     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1474   }
1475 
1476   /* copy tmp into x according to permutation */
1477   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1478 
1479   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1480   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1481   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1482   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1483 
1484   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 #undef __FUNCT__
1489 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace"
1490 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1491 {
1492   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1493   IS                iscol = a->col,isrow = a->row;
1494   PetscErrorCode    ierr;
1495   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1496   PetscInt          i,n = A->rmap->n,j;
1497   PetscInt          nz;
1498   PetscScalar       *x,*tmp,s1;
1499   const MatScalar   *aa = a->a,*v;
1500   const PetscScalar *b;
1501 
1502   PetscFunctionBegin;
1503   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1504   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1505   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1506   tmp  = a->solve_work;
1507 
1508   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1509   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1510 
1511   /* copy the b into temp work space according to permutation */
1512   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1513 
1514   /* forward solve the U^T */
1515   for (i=0; i<n; i++) {
1516     v   = aa + diag[i] ;
1517     vi  = aj + diag[i] + 1;
1518     nz  = ai[i+1] - diag[i] - 1;
1519     s1  = tmp[i];
1520     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1521     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1522     tmp[i] = s1;
1523   }
1524 
1525   /* backward solve the L^T */
1526   for (i=n-1; i>=0; i--){
1527     v   = aa + diag[i] - 1 ;
1528     vi  = aj + diag[i] - 1 ;
1529     nz  = diag[i] - ai[i];
1530     s1  = tmp[i];
1531     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1532   }
1533 
1534   /* copy tmp into x according to permutation */
1535   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1536 
1537   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1538   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1539   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1540   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1541 
1542   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1548 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1549 {
1550   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1551   IS                iscol = a->col,isrow = a->row;
1552   PetscErrorCode    ierr;
1553   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1554   PetscInt          i,n = A->rmap->n,j;
1555   PetscInt          nz;
1556   PetscScalar       *x,*tmp,s1;
1557   const MatScalar   *aa = a->a,*v;
1558   const PetscScalar *b;
1559 
1560   PetscFunctionBegin;
1561   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1562   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1563   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1564   tmp  = a->solve_work;
1565 
1566   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1567   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1568 
1569   /* copy the b into temp work space according to permutation */
1570   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1571 
1572   /* forward solve the U^T */
1573   for (i=0; i<n; i++) {
1574     v   = aa + adiag[i+1] + 1;
1575     vi  = aj + adiag[i+1] + 1;
1576     nz  = adiag[i] - adiag[i+1] - 1;
1577     s1  = tmp[i];
1578     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1579     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1580     tmp[i] = s1;
1581   }
1582 
1583 
1584   /* backward solve the L^T */
1585   for (i=n-1; i>=0; i--){
1586     v   = aa + ai[i] ;
1587     vi  = aj + ai[i];
1588     nz  = ai[i+1] - ai[i];
1589     s1  = tmp[i];
1590     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1591   }
1592 
1593   /* copy tmp into x according to permutation */
1594   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1595 
1596   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1597   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1598   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1599   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1600 
1601   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 /* ----------------------------------------------------------------*/
1606 
1607 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool );
1608 
1609 /*
1610    ilu() under revised new data structure.
1611    Factored arrays bj and ba are stored as
1612      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1613 
1614    bi=fact->i is an array of size n+1, in which
1615    bi+
1616      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1617      bi[n]:  points to L(n-1,n-1)+1
1618 
1619   bdiag=fact->diag is an array of size n+1,in which
1620      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1621      bdiag[n]: points to entry of U(n-1,0)-1
1622 
1623    U(i,:) contains bdiag[i] as its last entry, i.e.,
1624     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1625 */
1626 #undef __FUNCT__
1627 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0"
1628 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1629 {
1630 
1631   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1632   PetscErrorCode     ierr;
1633   const PetscInt     n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1634   PetscInt           i,j,k=0,nz,*bi,*bj,*bdiag;
1635   PetscBool          missing;
1636   IS                 isicol;
1637 
1638   PetscFunctionBegin;
1639   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1640   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1641   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1642   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1643   ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr);
1644   b    = (Mat_SeqAIJ*)(fact)->data;
1645 
1646   /* allocate matrix arrays for new data structure */
1647   ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr);
1648   ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1649   b->singlemalloc = PETSC_TRUE;
1650   if (!b->diag){
1651     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr);
1652     ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1653   }
1654   bdiag = b->diag;
1655 
1656   if (n > 0) {
1657     ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr);
1658   }
1659 
1660   /* set bi and bj with new data structure */
1661   bi = b->i;
1662   bj = b->j;
1663 
1664   /* L part */
1665   bi[0] = 0;
1666   for (i=0; i<n; i++){
1667     nz = adiag[i] - ai[i];
1668     bi[i+1] = bi[i] + nz;
1669     aj = a->j + ai[i];
1670     for (j=0; j<nz; j++){
1671       /*   *bj = aj[j]; bj++; */
1672       bj[k++] = aj[j];
1673     }
1674   }
1675 
1676   /* U part */
1677   bdiag[n] = bi[n]-1;
1678   for (i=n-1; i>=0; i--){
1679     nz = ai[i+1] - adiag[i] - 1;
1680     aj = a->j + adiag[i] + 1;
1681     for (j=0; j<nz; j++){
1682       /*      *bj = aj[j]; bj++; */
1683       bj[k++] = aj[j];
1684     }
1685     /* diag[i] */
1686     /*    *bj = i; bj++; */
1687     bj[k++] = i;
1688     bdiag[i] = bdiag[i+1] + nz + 1;
1689   }
1690 
1691   fact->factortype             = MAT_FACTOR_ILU;
1692   fact->info.factor_mallocs    = 0;
1693   fact->info.fill_ratio_given  = info->fill;
1694   fact->info.fill_ratio_needed = 1.0;
1695   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1696 
1697   b       = (Mat_SeqAIJ*)(fact)->data;
1698   b->row  = isrow;
1699   b->col  = iscol;
1700   b->icol = isicol;
1701   ierr    = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1702   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1703   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 #undef __FUNCT__
1708 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1709 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1710 {
1711   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1712   IS                 isicol;
1713   PetscErrorCode     ierr;
1714   const PetscInt     *r,*ic;
1715   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1716   PetscInt           *bi,*cols,nnz,*cols_lvl;
1717   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1718   PetscInt           i,levels,diagonal_fill;
1719   PetscBool          col_identity,row_identity;
1720   PetscReal          f;
1721   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1722   PetscBT            lnkbt;
1723   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1724   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1725   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1726 
1727   PetscFunctionBegin;
1728   /* Uncomment the old data struct part only while testing new data structure for MatSolve() */
1729   /*
1730   PetscBool          olddatastruct=PETSC_FALSE;
1731   ierr = PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr);
1732   if(olddatastruct){
1733     ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1734     PetscFunctionReturn(0);
1735   }
1736   */
1737 
1738   levels = (PetscInt)info->levels;
1739   ierr   = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1740   ierr   = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1741   if (!levels && row_identity && col_identity) {
1742     /* special case: ilu(0) with natural ordering */
1743     ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1744     if (a->inode.size) {
1745       fact->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1746     }
1747     PetscFunctionReturn(0);
1748   }
1749 
1750   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1751   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1752   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1753   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1754 
1755   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1756   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1757   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1758   bi[0] = bdiag[0] = 0;
1759   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1760 
1761   /* create a linked list for storing column indices of the active row */
1762   nlnk = n + 1;
1763   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1764 
1765   /* initial FreeSpace size is f*(ai[n]+1) */
1766   f             = info->fill;
1767   diagonal_fill = (PetscInt)info->diagonal_fill;
1768   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1769   current_space = free_space;
1770   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1771   current_space_lvl = free_space_lvl;
1772   for (i=0; i<n; i++) {
1773     nzi = 0;
1774     /* copy current row into linked list */
1775     nnz  = ai[r[i]+1] - ai[r[i]];
1776     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1777     cols = aj + ai[r[i]];
1778     lnk[i] = -1; /* marker to indicate if diagonal exists */
1779     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1780     nzi += nlnk;
1781 
1782     /* make sure diagonal entry is included */
1783     if (diagonal_fill && lnk[i] == -1) {
1784       fm = n;
1785       while (lnk[fm] < i) fm = lnk[fm];
1786       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1787       lnk[fm]    = i;
1788       lnk_lvl[i] = 0;
1789       nzi++; dcount++;
1790     }
1791 
1792     /* add pivot rows into the active row */
1793     nzbd = 0;
1794     prow = lnk[n];
1795     while (prow < i) {
1796       nnz      = bdiag[prow];
1797       cols     = bj_ptr[prow] + nnz + 1;
1798       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1799       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1800       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1801       nzi += nlnk;
1802       prow = lnk[prow];
1803       nzbd++;
1804     }
1805     bdiag[i] = nzbd;
1806     bi[i+1]  = bi[i] + nzi;
1807     /* if free space is not available, make more free space */
1808     if (current_space->local_remaining<nzi) {
1809       nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1810       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1811       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1812       reallocs++;
1813     }
1814 
1815     /* copy data into free_space and free_space_lvl, then initialize lnk */
1816     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1817     bj_ptr[i]    = current_space->array;
1818     bjlvl_ptr[i] = current_space_lvl->array;
1819 
1820     /* make sure the active row i has diagonal entry */
1821     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1822 
1823     current_space->array           += nzi;
1824     current_space->local_used      += nzi;
1825     current_space->local_remaining -= nzi;
1826     current_space_lvl->array           += nzi;
1827     current_space_lvl->local_used      += nzi;
1828     current_space_lvl->local_remaining -= nzi;
1829   }
1830 
1831   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1832   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1833   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1834   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1835   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
1836 
1837   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1838   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1839   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
1840 
1841 #if defined(PETSC_USE_INFO)
1842   {
1843     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1844     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1845     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1846     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1847     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1848     if (diagonal_fill) {
1849       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1850     }
1851   }
1852 #endif
1853   /* put together the new matrix */
1854   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1855   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1856   b = (Mat_SeqAIJ*)(fact)->data;
1857   b->free_a       = PETSC_TRUE;
1858   b->free_ij      = PETSC_TRUE;
1859   b->singlemalloc = PETSC_FALSE;
1860   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
1861   b->j          = bj;
1862   b->i          = bi;
1863   b->diag       = bdiag;
1864   b->ilen       = 0;
1865   b->imax       = 0;
1866   b->row        = isrow;
1867   b->col        = iscol;
1868   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1869   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1870   b->icol       = isicol;
1871   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1872   /* In b structure:  Free imax, ilen, old a, old j.
1873      Allocate bdiag, solve_work, new a, new j */
1874   ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1875   b->maxnz = b->nz = bdiag[0]+1;
1876   (fact)->info.factor_mallocs    = reallocs;
1877   (fact)->info.fill_ratio_given  = f;
1878   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1879   (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1880   if (a->inode.size) {
1881     (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode;
1882   }
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace"
1888 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1889 {
1890   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1891   IS                 isicol;
1892   PetscErrorCode     ierr;
1893   const PetscInt     *r,*ic;
1894   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1895   PetscInt           *bi,*cols,nnz,*cols_lvl;
1896   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1897   PetscInt           i,levels,diagonal_fill;
1898   PetscBool          col_identity,row_identity;
1899   PetscReal          f;
1900   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1901   PetscBT            lnkbt;
1902   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1903   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1904   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1905   PetscBool          missing;
1906 
1907   PetscFunctionBegin;
1908   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1909   f             = info->fill;
1910   levels        = (PetscInt)info->levels;
1911   diagonal_fill = (PetscInt)info->diagonal_fill;
1912   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1913 
1914   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1915   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1916   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1917     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr);
1918     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1919     if (a->inode.size) {
1920       (fact)->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1921     }
1922     fact->factortype = MAT_FACTOR_ILU;
1923     (fact)->info.factor_mallocs    = 0;
1924     (fact)->info.fill_ratio_given  = info->fill;
1925     (fact)->info.fill_ratio_needed = 1.0;
1926     b               = (Mat_SeqAIJ*)(fact)->data;
1927     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1928     if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1929     b->row              = isrow;
1930     b->col              = iscol;
1931     b->icol             = isicol;
1932     ierr                = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1933     ierr                = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1934     ierr                = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1935     PetscFunctionReturn(0);
1936   }
1937 
1938   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1939   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1940 
1941   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1942   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1943   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1944   bi[0] = bdiag[0] = 0;
1945 
1946   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1947 
1948   /* create a linked list for storing column indices of the active row */
1949   nlnk = n + 1;
1950   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1951 
1952   /* initial FreeSpace size is f*(ai[n]+1) */
1953   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1954   current_space = free_space;
1955   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1956   current_space_lvl = free_space_lvl;
1957 
1958   for (i=0; i<n; i++) {
1959     nzi = 0;
1960     /* copy current row into linked list */
1961     nnz  = ai[r[i]+1] - ai[r[i]];
1962     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1963     cols = aj + ai[r[i]];
1964     lnk[i] = -1; /* marker to indicate if diagonal exists */
1965     ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1966     nzi += nlnk;
1967 
1968     /* make sure diagonal entry is included */
1969     if (diagonal_fill && lnk[i] == -1) {
1970       fm = n;
1971       while (lnk[fm] < i) fm = lnk[fm];
1972       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1973       lnk[fm]    = i;
1974       lnk_lvl[i] = 0;
1975       nzi++; dcount++;
1976     }
1977 
1978     /* add pivot rows into the active row */
1979     nzbd = 0;
1980     prow = lnk[n];
1981     while (prow < i) {
1982       nnz      = bdiag[prow];
1983       cols     = bj_ptr[prow] + nnz + 1;
1984       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1985       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1986       ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1987       nzi += nlnk;
1988       prow = lnk[prow];
1989       nzbd++;
1990     }
1991     bdiag[i] = nzbd;
1992     bi[i+1]  = bi[i] + nzi;
1993 
1994     /* if free space is not available, make more free space */
1995     if (current_space->local_remaining<nzi) {
1996       nnz = nzi*(n - i); /* estimated and max additional space needed */
1997       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1998       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1999       reallocs++;
2000     }
2001 
2002     /* copy data into free_space and free_space_lvl, then initialize lnk */
2003     ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2004     bj_ptr[i]    = current_space->array;
2005     bjlvl_ptr[i] = current_space_lvl->array;
2006 
2007     /* make sure the active row i has diagonal entry */
2008     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2009 
2010     current_space->array           += nzi;
2011     current_space->local_used      += nzi;
2012     current_space->local_remaining -= nzi;
2013     current_space_lvl->array           += nzi;
2014     current_space_lvl->local_used      += nzi;
2015     current_space_lvl->local_remaining -= nzi;
2016   }
2017 
2018   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2019   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2020 
2021   /* destroy list of free space and other temporary arrays */
2022   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
2023   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */
2024   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2025   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2026   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
2027 
2028 #if defined(PETSC_USE_INFO)
2029   {
2030     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2031     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
2032     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2033     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
2034     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2035     if (diagonal_fill) {
2036       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
2037     }
2038   }
2039 #endif
2040 
2041   /* put together the new matrix */
2042   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
2043   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
2044   b = (Mat_SeqAIJ*)(fact)->data;
2045   b->free_a       = PETSC_TRUE;
2046   b->free_ij      = PETSC_TRUE;
2047   b->singlemalloc = PETSC_FALSE;
2048   ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
2049   b->j          = bj;
2050   b->i          = bi;
2051   for (i=0; i<n; i++) bdiag[i] += bi[i];
2052   b->diag       = bdiag;
2053   b->ilen       = 0;
2054   b->imax       = 0;
2055   b->row        = isrow;
2056   b->col        = iscol;
2057   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2058   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2059   b->icol       = isicol;
2060   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2061   /* In b structure:  Free imax, ilen, old a, old j.
2062      Allocate bdiag, solve_work, new a, new j */
2063   ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
2064   b->maxnz             = b->nz = bi[n] ;
2065   (fact)->info.factor_mallocs    = reallocs;
2066   (fact)->info.fill_ratio_given  = f;
2067   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2068   (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
2069   if (a->inode.size) {
2070     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2071   }
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 #undef __FUNCT__
2076 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
2077 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2078 {
2079   Mat            C = B;
2080   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2081   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2082   IS             ip=b->row,iip = b->icol;
2083   PetscErrorCode ierr;
2084   const PetscInt *rip,*riip;
2085   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2086   PetscInt       *ai=a->i,*aj=a->j;
2087   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2088   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2089   PetscBool      perm_identity;
2090   FactorShiftCtx sctx;
2091   PetscReal      rs;
2092   MatScalar      d,*v;
2093 
2094   PetscFunctionBegin;
2095   /* MatPivotSetUp(): initialize shift context sctx */
2096   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2097 
2098   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2099     sctx.shift_top = info->zeropivot;
2100     for (i=0; i<mbs; i++) {
2101       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2102       d  = (aa)[a->diag[i]];
2103       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2104       v  = aa+ai[i];
2105       nz = ai[i+1] - ai[i];
2106       for (j=0; j<nz; j++)
2107 	rs += PetscAbsScalar(v[j]);
2108       if (rs>sctx.shift_top) sctx.shift_top = rs;
2109     }
2110     sctx.shift_top   *= 1.1;
2111     sctx.nshift_max   = 5;
2112     sctx.shift_lo     = 0.;
2113     sctx.shift_hi     = 1.;
2114   }
2115 
2116   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2117   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2118 
2119   /* allocate working arrays
2120      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2121      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2122   */
2123   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr);
2124 
2125   do {
2126     sctx.newshift = PETSC_FALSE;
2127 
2128     for (i=0; i<mbs; i++) c2r[i] = mbs;
2129     il[0] = 0;
2130 
2131     for (k = 0; k<mbs; k++){
2132       /* zero rtmp */
2133       nz = bi[k+1] - bi[k];
2134       bjtmp = bj + bi[k];
2135       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2136 
2137       /* load in initial unfactored row */
2138       bval = ba + bi[k];
2139       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2140       for (j = jmin; j < jmax; j++){
2141         col = riip[aj[j]];
2142         if (col >= k){ /* only take upper triangular entry */
2143           rtmp[col] = aa[j];
2144           *bval++   = 0.0; /* for in-place factorization */
2145         }
2146       }
2147       /* shift the diagonal of the matrix: ZeropivotApply() */
2148       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2149 
2150       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2151       dk = rtmp[k];
2152       i  = c2r[k]; /* first row to be added to k_th row  */
2153 
2154       while (i < k){
2155         nexti = c2r[i]; /* next row to be added to k_th row */
2156 
2157         /* compute multiplier, update diag(k) and U(i,k) */
2158         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2159         uikdi = - ba[ili]*ba[bdiag[i]];  /* diagonal(k) */
2160         dk   += uikdi*ba[ili]; /* update diag[k] */
2161         ba[ili] = uikdi; /* -U(i,k) */
2162 
2163         /* add multiple of row i to k-th row */
2164         jmin = ili + 1; jmax = bi[i+1];
2165         if (jmin < jmax){
2166           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2167           /* update il and c2r for row i */
2168           il[i] = jmin;
2169           j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2170         }
2171         i = nexti;
2172       }
2173 
2174       /* copy data into U(k,:) */
2175       rs   = 0.0;
2176       jmin = bi[k]; jmax = bi[k+1]-1;
2177       if (jmin < jmax) {
2178         for (j=jmin; j<jmax; j++){
2179           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2180         }
2181         /* add the k-th row into il and c2r */
2182         il[k] = jmin;
2183         i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2184       }
2185 
2186       /* MatPivotCheck() */
2187       sctx.rs  = rs;
2188       sctx.pv  = dk;
2189       ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
2190       if(sctx.newshift) break;
2191       dk = sctx.pv;
2192 
2193       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2194     }
2195   } while (sctx.newshift);
2196 
2197   ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr);
2198   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2199   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2200 
2201   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2202   if (perm_identity){
2203     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2204     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2205     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2206     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2207   } else {
2208     B->ops->solve           = MatSolve_SeqSBAIJ_1;
2209     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
2210     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
2211     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
2212   }
2213 
2214   C->assembled    = PETSC_TRUE;
2215   C->preallocated = PETSC_TRUE;
2216   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2217 
2218   /* MatPivotView() */
2219   if (sctx.nshift){
2220     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2221       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
2222     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2223       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2224     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
2225       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
2226     }
2227   }
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 #undef __FUNCT__
2232 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace"
2233 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2234 {
2235   Mat            C = B;
2236   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2237   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2238   IS             ip=b->row,iip = b->icol;
2239   PetscErrorCode ierr;
2240   const PetscInt *rip,*riip;
2241   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2242   PetscInt       *ai=a->i,*aj=a->j;
2243   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2244   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2245   PetscBool      perm_identity;
2246   FactorShiftCtx sctx;
2247   PetscReal      rs;
2248   MatScalar      d,*v;
2249 
2250   PetscFunctionBegin;
2251   /* MatPivotSetUp(): initialize shift context sctx */
2252   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2253 
2254   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2255     sctx.shift_top = info->zeropivot;
2256     for (i=0; i<mbs; i++) {
2257       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2258       d  = (aa)[a->diag[i]];
2259       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2260       v  = aa+ai[i];
2261       nz = ai[i+1] - ai[i];
2262       for (j=0; j<nz; j++)
2263 	rs += PetscAbsScalar(v[j]);
2264       if (rs>sctx.shift_top) sctx.shift_top = rs;
2265     }
2266     sctx.shift_top   *= 1.1;
2267     sctx.nshift_max   = 5;
2268     sctx.shift_lo     = 0.;
2269     sctx.shift_hi     = 1.;
2270   }
2271 
2272   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2273   ierr  = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2274 
2275   /* initialization */
2276   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr);
2277 
2278   do {
2279     sctx.newshift = PETSC_FALSE;
2280 
2281     for (i=0; i<mbs; i++) jl[i] = mbs;
2282     il[0] = 0;
2283 
2284     for (k = 0; k<mbs; k++){
2285       /* zero rtmp */
2286       nz = bi[k+1] - bi[k];
2287       bjtmp = bj + bi[k];
2288       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2289 
2290       bval = ba + bi[k];
2291       /* initialize k-th row by the perm[k]-th row of A */
2292       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2293       for (j = jmin; j < jmax; j++){
2294         col = riip[aj[j]];
2295         if (col >= k){ /* only take upper triangular entry */
2296           rtmp[col] = aa[j];
2297           *bval++  = 0.0; /* for in-place factorization */
2298         }
2299       }
2300       /* shift the diagonal of the matrix */
2301       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2302 
2303       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2304       dk = rtmp[k];
2305       i = jl[k]; /* first row to be added to k_th row  */
2306 
2307       while (i < k){
2308         nexti = jl[i]; /* next row to be added to k_th row */
2309 
2310         /* compute multiplier, update diag(k) and U(i,k) */
2311         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2312         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
2313         dk += uikdi*ba[ili];
2314         ba[ili] = uikdi; /* -U(i,k) */
2315 
2316         /* add multiple of row i to k-th row */
2317         jmin = ili + 1; jmax = bi[i+1];
2318         if (jmin < jmax){
2319           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2320           /* update il and jl for row i */
2321           il[i] = jmin;
2322           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2323         }
2324         i = nexti;
2325       }
2326 
2327       /* shift the diagonals when zero pivot is detected */
2328       /* compute rs=sum of abs(off-diagonal) */
2329       rs   = 0.0;
2330       jmin = bi[k]+1;
2331       nz   = bi[k+1] - jmin;
2332       bcol = bj + jmin;
2333       for (j=0; j<nz; j++) {
2334         rs += PetscAbsScalar(rtmp[bcol[j]]);
2335       }
2336 
2337       sctx.rs = rs;
2338       sctx.pv = dk;
2339       ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr);
2340       if (sctx.newshift) break;
2341       dk = sctx.pv;
2342 
2343       /* copy data into U(k,:) */
2344       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2345       jmin = bi[k]+1; jmax = bi[k+1];
2346       if (jmin < jmax) {
2347         for (j=jmin; j<jmax; j++){
2348           col = bj[j]; ba[j] = rtmp[col];
2349         }
2350         /* add the k-th row into il and jl */
2351         il[k] = jmin;
2352         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2353       }
2354     }
2355   } while (sctx.newshift);
2356 
2357   ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr);
2358   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2359   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2360 
2361   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2362   if (perm_identity){
2363     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2364     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2365     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2366     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2367   } else {
2368     B->ops->solve           = MatSolve_SeqSBAIJ_1_inplace;
2369     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_inplace;
2370     B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_inplace;
2371     B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_inplace;
2372   }
2373 
2374   C->assembled    = PETSC_TRUE;
2375   C->preallocated = PETSC_TRUE;
2376   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2377   if (sctx.nshift){
2378     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2379       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2380     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2381       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2382     }
2383   }
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 /*
2388    icc() under revised new data structure.
2389    Factored arrays bj and ba are stored as
2390      U(0,:),...,U(i,:),U(n-1,:)
2391 
2392    ui=fact->i is an array of size n+1, in which
2393    ui+
2394      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2395      ui[n]:  points to U(n-1,n-1)+1
2396 
2397   udiag=fact->diag is an array of size n,in which
2398      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2399 
2400    U(i,:) contains udiag[i] as its last entry, i.e.,
2401     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2402 */
2403 
2404 #undef __FUNCT__
2405 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
2406 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2407 {
2408   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2409   Mat_SeqSBAIJ       *b;
2410   PetscErrorCode     ierr;
2411   PetscBool          perm_identity,missing;
2412   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2413   const PetscInt     *rip,*riip;
2414   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2415   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2416   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2417   PetscReal          fill=info->fill,levels=info->levels;
2418   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2419   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2420   PetscBT            lnkbt;
2421   IS                 iperm;
2422 
2423   PetscFunctionBegin;
2424   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2425   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2426   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2427   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2428   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2429 
2430   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2431   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2432   ui[0] = 0;
2433 
2434   /* ICC(0) without matrix ordering: simply rearrange column indices */
2435   if (!levels && perm_identity) {
2436     for (i=0; i<am; i++) {
2437       ncols    = ai[i+1] - a->diag[i];
2438       ui[i+1]  = ui[i] + ncols;
2439       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2440     }
2441     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2442     cols = uj;
2443     for (i=0; i<am; i++) {
2444       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2445       ncols = ai[i+1] - a->diag[i] -1;
2446       for (j=0; j<ncols; j++) *cols++ = aj[j];
2447       *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2448     }
2449   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2450     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2451     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2452 
2453     /* initialization */
2454     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2455 
2456     /* jl: linked list for storing indices of the pivot rows
2457        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2458     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2459     for (i=0; i<am; i++){
2460       jl[i] = am; il[i] = 0;
2461     }
2462 
2463     /* create and initialize a linked list for storing column indices of the active row k */
2464     nlnk = am + 1;
2465     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2466 
2467     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2468     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2469     current_space = free_space;
2470     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr);
2471     current_space_lvl = free_space_lvl;
2472 
2473     for (k=0; k<am; k++){  /* for each active row k */
2474       /* initialize lnk by the column indices of row rip[k] of A */
2475       nzk   = 0;
2476       ncols = ai[rip[k]+1] - ai[rip[k]];
2477       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2478       ncols_upper = 0;
2479       for (j=0; j<ncols; j++){
2480         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2481         if (riip[i] >= k){ /* only take upper triangular entry */
2482           ajtmp[ncols_upper] = i;
2483           ncols_upper++;
2484         }
2485       }
2486       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2487       nzk += nlnk;
2488 
2489       /* update lnk by computing fill-in for each pivot row to be merged in */
2490       prow = jl[k]; /* 1st pivot row */
2491 
2492       while (prow < k){
2493         nextprow = jl[prow];
2494 
2495         /* merge prow into k-th row */
2496         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2497         jmax = ui[prow+1];
2498         ncols = jmax-jmin;
2499         i     = jmin - ui[prow];
2500         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2501         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2502         j     = *(uj - 1);
2503         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2504         nzk += nlnk;
2505 
2506         /* update il and jl for prow */
2507         if (jmin < jmax){
2508           il[prow] = jmin;
2509           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2510         }
2511         prow = nextprow;
2512       }
2513 
2514       /* if free space is not available, make more free space */
2515       if (current_space->local_remaining<nzk) {
2516         i  = am - k + 1; /* num of unfactored rows */
2517         i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2518         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2519         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2520         reallocs++;
2521       }
2522 
2523       /* copy data into free_space and free_space_lvl, then initialize lnk */
2524       if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2525       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2526 
2527       /* add the k-th row into il and jl */
2528       if (nzk > 1){
2529         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2530         jl[k] = jl[i]; jl[i] = k;
2531         il[k] = ui[k] + 1;
2532       }
2533       uj_ptr[k]     = current_space->array;
2534       uj_lvl_ptr[k] = current_space_lvl->array;
2535 
2536       current_space->array           += nzk;
2537       current_space->local_used      += nzk;
2538       current_space->local_remaining -= nzk;
2539 
2540       current_space_lvl->array           += nzk;
2541       current_space_lvl->local_used      += nzk;
2542       current_space_lvl->local_remaining -= nzk;
2543 
2544       ui[k+1] = ui[k] + nzk;
2545     }
2546 
2547     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2548     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2549     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2550     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2551 
2552     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2553     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2554     ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor  */
2555     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2556     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2557 
2558   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2559 
2560   /* put together the new matrix in MATSEQSBAIJ format */
2561   b    = (Mat_SeqSBAIJ*)(fact)->data;
2562   b->singlemalloc = PETSC_FALSE;
2563   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2564   b->j    = uj;
2565   b->i    = ui;
2566   b->diag = udiag;
2567   b->free_diag = PETSC_TRUE;
2568   b->ilen = 0;
2569   b->imax = 0;
2570   b->row  = perm;
2571   b->col  = perm;
2572   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2573   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2574   b->icol = iperm;
2575   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2576   ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2577   ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2578   b->maxnz   = b->nz = ui[am];
2579   b->free_a  = PETSC_TRUE;
2580   b->free_ij = PETSC_TRUE;
2581 
2582   fact->info.factor_mallocs   = reallocs;
2583   fact->info.fill_ratio_given = fill;
2584   if (ai[am] != 0) {
2585     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2586     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2587   } else {
2588     fact->info.fill_ratio_needed = 0.0;
2589   }
2590 #if defined(PETSC_USE_INFO)
2591     if (ai[am] != 0) {
2592       PetscReal af = fact->info.fill_ratio_needed;
2593       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2594       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2595       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2596     } else {
2597       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2598     }
2599 #endif
2600   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2601   PetscFunctionReturn(0);
2602 }
2603 
2604 #undef __FUNCT__
2605 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace"
2606 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2607 {
2608   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2609   Mat_SeqSBAIJ       *b;
2610   PetscErrorCode     ierr;
2611   PetscBool          perm_identity,missing;
2612   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2613   const PetscInt     *rip,*riip;
2614   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2615   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2616   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2617   PetscReal          fill=info->fill,levels=info->levels;
2618   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2619   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2620   PetscBT            lnkbt;
2621   IS                 iperm;
2622 
2623   PetscFunctionBegin;
2624   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2625   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2626   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2627   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2628   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2629 
2630   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2631   ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2632   ui[0] = 0;
2633 
2634   /* ICC(0) without matrix ordering: simply copies fill pattern */
2635   if (!levels && perm_identity) {
2636 
2637     for (i=0; i<am; i++) {
2638       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2639       udiag[i] = ui[i];
2640     }
2641     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2642     cols = uj;
2643     for (i=0; i<am; i++) {
2644       aj    = a->j + a->diag[i];
2645       ncols = ui[i+1] - ui[i];
2646       for (j=0; j<ncols; j++) *cols++ = *aj++;
2647     }
2648   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2649     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2650     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2651 
2652     /* initialization */
2653     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2654 
2655     /* jl: linked list for storing indices of the pivot rows
2656        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2657     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2658     for (i=0; i<am; i++){
2659       jl[i] = am; il[i] = 0;
2660     }
2661 
2662     /* create and initialize a linked list for storing column indices of the active row k */
2663     nlnk = am + 1;
2664     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2665 
2666     /* initial FreeSpace size is fill*(ai[am]+1) */
2667     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2668     current_space = free_space;
2669     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2670     current_space_lvl = free_space_lvl;
2671 
2672     for (k=0; k<am; k++){  /* for each active row k */
2673       /* initialize lnk by the column indices of row rip[k] of A */
2674       nzk   = 0;
2675       ncols = ai[rip[k]+1] - ai[rip[k]];
2676       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2677       ncols_upper = 0;
2678       for (j=0; j<ncols; j++){
2679         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2680         if (riip[i] >= k){ /* only take upper triangular entry */
2681           ajtmp[ncols_upper] = i;
2682           ncols_upper++;
2683         }
2684       }
2685       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2686       nzk += nlnk;
2687 
2688       /* update lnk by computing fill-in for each pivot row to be merged in */
2689       prow = jl[k]; /* 1st pivot row */
2690 
2691       while (prow < k){
2692         nextprow = jl[prow];
2693 
2694         /* merge prow into k-th row */
2695         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2696         jmax = ui[prow+1];
2697         ncols = jmax-jmin;
2698         i     = jmin - ui[prow];
2699         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2700         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2701         j     = *(uj - 1);
2702         ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2703         nzk += nlnk;
2704 
2705         /* update il and jl for prow */
2706         if (jmin < jmax){
2707           il[prow] = jmin;
2708           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2709         }
2710         prow = nextprow;
2711       }
2712 
2713       /* if free space is not available, make more free space */
2714       if (current_space->local_remaining<nzk) {
2715         i = am - k + 1; /* num of unfactored rows */
2716         i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2717         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2718         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2719         reallocs++;
2720       }
2721 
2722       /* copy data into free_space and free_space_lvl, then initialize lnk */
2723       if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2724       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2725 
2726       /* add the k-th row into il and jl */
2727       if (nzk > 1){
2728         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2729         jl[k] = jl[i]; jl[i] = k;
2730         il[k] = ui[k] + 1;
2731       }
2732       uj_ptr[k]     = current_space->array;
2733       uj_lvl_ptr[k] = current_space_lvl->array;
2734 
2735       current_space->array           += nzk;
2736       current_space->local_used      += nzk;
2737       current_space->local_remaining -= nzk;
2738 
2739       current_space_lvl->array           += nzk;
2740       current_space_lvl->local_used      += nzk;
2741       current_space_lvl->local_remaining -= nzk;
2742 
2743       ui[k+1] = ui[k] + nzk;
2744     }
2745 
2746 #if defined(PETSC_USE_INFO)
2747     if (ai[am] != 0) {
2748       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2749       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2750       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2751       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2752     } else {
2753       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2754     }
2755 #endif
2756 
2757     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2758     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2759     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2760     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2761 
2762     /* destroy list of free space and other temporary array(s) */
2763     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2764     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2765     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2766     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2767 
2768   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2769 
2770   /* put together the new matrix in MATSEQSBAIJ format */
2771 
2772   b    = (Mat_SeqSBAIJ*)fact->data;
2773   b->singlemalloc = PETSC_FALSE;
2774   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2775   b->j    = uj;
2776   b->i    = ui;
2777   b->diag = udiag;
2778   b->free_diag = PETSC_TRUE;
2779   b->ilen = 0;
2780   b->imax = 0;
2781   b->row  = perm;
2782   b->col  = perm;
2783   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2784   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2785   b->icol = iperm;
2786   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2787   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2788   ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2789   b->maxnz   = b->nz = ui[am];
2790   b->free_a  = PETSC_TRUE;
2791   b->free_ij = PETSC_TRUE;
2792 
2793   fact->info.factor_mallocs    = reallocs;
2794   fact->info.fill_ratio_given  = fill;
2795   if (ai[am] != 0) {
2796     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2797   } else {
2798     fact->info.fill_ratio_needed = 0.0;
2799   }
2800   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2801   PetscFunctionReturn(0);
2802 }
2803 
2804 #undef __FUNCT__
2805 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
2806 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2807 {
2808   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2809   Mat_SeqSBAIJ       *b;
2810   PetscErrorCode     ierr;
2811   PetscBool          perm_identity;
2812   PetscReal          fill = info->fill;
2813   const PetscInt     *rip,*riip;
2814   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2815   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2816   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2817   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2818   PetscBT            lnkbt;
2819   IS                 iperm;
2820 
2821   PetscFunctionBegin;
2822   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2823   /* check whether perm is the identity mapping */
2824   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2825   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2826   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2827   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2828 
2829   /* initialization */
2830   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2831   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2832   ui[0] = 0;
2833 
2834   /* jl: linked list for storing indices of the pivot rows
2835      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2836   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
2837   for (i=0; i<am; i++){
2838     jl[i] = am; il[i] = 0;
2839   }
2840 
2841   /* create and initialize a linked list for storing column indices of the active row k */
2842   nlnk = am + 1;
2843   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2844 
2845   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2846   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2847   current_space = free_space;
2848 
2849   for (k=0; k<am; k++){  /* for each active row k */
2850     /* initialize lnk by the column indices of row rip[k] of A */
2851     nzk   = 0;
2852     ncols = ai[rip[k]+1] - ai[rip[k]];
2853     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2854     ncols_upper = 0;
2855     for (j=0; j<ncols; j++){
2856       i = riip[*(aj + ai[rip[k]] + j)];
2857       if (i >= k){ /* only take upper triangular entry */
2858         cols[ncols_upper] = i;
2859         ncols_upper++;
2860       }
2861     }
2862     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2863     nzk += nlnk;
2864 
2865     /* update lnk by computing fill-in for each pivot row to be merged in */
2866     prow = jl[k]; /* 1st pivot row */
2867 
2868     while (prow < k){
2869       nextprow = jl[prow];
2870       /* merge prow into k-th row */
2871       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2872       jmax = ui[prow+1];
2873       ncols = jmax-jmin;
2874       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2875       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2876       nzk += nlnk;
2877 
2878       /* update il and jl for prow */
2879       if (jmin < jmax){
2880         il[prow] = jmin;
2881         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
2882       }
2883       prow = nextprow;
2884     }
2885 
2886     /* if free space is not available, make more free space */
2887     if (current_space->local_remaining<nzk) {
2888       i  = am - k + 1; /* num of unfactored rows */
2889       i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2890       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2891       reallocs++;
2892     }
2893 
2894     /* copy data into free space, then initialize lnk */
2895     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
2896 
2897     /* add the k-th row into il and jl */
2898     if (nzk > 1){
2899       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2900       jl[k] = jl[i]; jl[i] = k;
2901       il[k] = ui[k] + 1;
2902     }
2903     ui_ptr[k] = current_space->array;
2904     current_space->array           += nzk;
2905     current_space->local_used      += nzk;
2906     current_space->local_remaining -= nzk;
2907 
2908     ui[k+1] = ui[k] + nzk;
2909   }
2910 
2911   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2912   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2913   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
2914 
2915   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2916   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2917   ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */
2918   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2919 
2920   /* put together the new matrix in MATSEQSBAIJ format */
2921 
2922   b = (Mat_SeqSBAIJ*)fact->data;
2923   b->singlemalloc = PETSC_FALSE;
2924   b->free_a       = PETSC_TRUE;
2925   b->free_ij      = PETSC_TRUE;
2926   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2927   b->j    = uj;
2928   b->i    = ui;
2929   b->diag = udiag;
2930   b->free_diag = PETSC_TRUE;
2931   b->ilen = 0;
2932   b->imax = 0;
2933   b->row  = perm;
2934   b->col  = perm;
2935   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2936   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2937   b->icol = iperm;
2938   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2939   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2940   ierr    = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2941   b->maxnz = b->nz = ui[am];
2942 
2943   fact->info.factor_mallocs    = reallocs;
2944   fact->info.fill_ratio_given  = fill;
2945   if (ai[am] != 0) {
2946     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2947     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2948   } else {
2949     fact->info.fill_ratio_needed = 0.0;
2950   }
2951 #if defined(PETSC_USE_INFO)
2952   if (ai[am] != 0) {
2953     PetscReal af = fact->info.fill_ratio_needed;
2954     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2955     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2956     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2957   } else {
2958      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2959   }
2960 #endif
2961   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2962   PetscFunctionReturn(0);
2963 }
2964 
2965 #undef __FUNCT__
2966 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace"
2967 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2968 {
2969   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2970   Mat_SeqSBAIJ       *b;
2971   PetscErrorCode     ierr;
2972   PetscBool          perm_identity;
2973   PetscReal          fill = info->fill;
2974   const PetscInt     *rip,*riip;
2975   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2976   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2977   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2978   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2979   PetscBT            lnkbt;
2980   IS                 iperm;
2981 
2982   PetscFunctionBegin;
2983   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2984   /* check whether perm is the identity mapping */
2985   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2986   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2987   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2988   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2989 
2990   /* initialization */
2991   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2992   ui[0] = 0;
2993 
2994   /* jl: linked list for storing indices of the pivot rows
2995      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2996   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
2997   for (i=0; i<am; i++){
2998     jl[i] = am; il[i] = 0;
2999   }
3000 
3001   /* create and initialize a linked list for storing column indices of the active row k */
3002   nlnk = am + 1;
3003   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3004 
3005   /* initial FreeSpace size is fill*(ai[am]+1) */
3006   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
3007   current_space = free_space;
3008 
3009   for (k=0; k<am; k++){  /* for each active row k */
3010     /* initialize lnk by the column indices of row rip[k] of A */
3011     nzk   = 0;
3012     ncols = ai[rip[k]+1] - ai[rip[k]];
3013     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3014     ncols_upper = 0;
3015     for (j=0; j<ncols; j++){
3016       i = riip[*(aj + ai[rip[k]] + j)];
3017       if (i >= k){ /* only take upper triangular entry */
3018         cols[ncols_upper] = i;
3019         ncols_upper++;
3020       }
3021     }
3022     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3023     nzk += nlnk;
3024 
3025     /* update lnk by computing fill-in for each pivot row to be merged in */
3026     prow = jl[k]; /* 1st pivot row */
3027 
3028     while (prow < k){
3029       nextprow = jl[prow];
3030       /* merge prow into k-th row */
3031       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
3032       jmax = ui[prow+1];
3033       ncols = jmax-jmin;
3034       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3035       ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3036       nzk += nlnk;
3037 
3038       /* update il and jl for prow */
3039       if (jmin < jmax){
3040         il[prow] = jmin;
3041         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3042       }
3043       prow = nextprow;
3044     }
3045 
3046     /* if free space is not available, make more free space */
3047     if (current_space->local_remaining<nzk) {
3048       i = am - k + 1; /* num of unfactored rows */
3049       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3050       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
3051       reallocs++;
3052     }
3053 
3054     /* copy data into free space, then initialize lnk */
3055     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3056 
3057     /* add the k-th row into il and jl */
3058     if (nzk-1 > 0){
3059       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3060       jl[k] = jl[i]; jl[i] = k;
3061       il[k] = ui[k] + 1;
3062     }
3063     ui_ptr[k] = current_space->array;
3064     current_space->array           += nzk;
3065     current_space->local_used      += nzk;
3066     current_space->local_remaining -= nzk;
3067 
3068     ui[k+1] = ui[k] + nzk;
3069   }
3070 
3071 #if defined(PETSC_USE_INFO)
3072   if (ai[am] != 0) {
3073     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3074     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
3075     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
3076     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
3077   } else {
3078      ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
3079   }
3080 #endif
3081 
3082   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
3083   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
3084   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
3085 
3086   /* destroy list of free space and other temporary array(s) */
3087   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
3088   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
3089   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3090 
3091   /* put together the new matrix in MATSEQSBAIJ format */
3092 
3093   b = (Mat_SeqSBAIJ*)fact->data;
3094   b->singlemalloc = PETSC_FALSE;
3095   b->free_a       = PETSC_TRUE;
3096   b->free_ij      = PETSC_TRUE;
3097   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
3098   b->j    = uj;
3099   b->i    = ui;
3100   b->diag = 0;
3101   b->ilen = 0;
3102   b->imax = 0;
3103   b->row  = perm;
3104   b->col  = perm;
3105   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3106   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3107   b->icol = iperm;
3108   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3109   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3110   ierr    = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3111   b->maxnz = b->nz = ui[am];
3112 
3113   fact->info.factor_mallocs    = reallocs;
3114   fact->info.fill_ratio_given  = fill;
3115   if (ai[am] != 0) {
3116     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3117   } else {
3118     fact->info.fill_ratio_needed = 0.0;
3119   }
3120   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3121   PetscFunctionReturn(0);
3122 }
3123 
3124 #undef __FUNCT__
3125 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
3126 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3127 {
3128   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3129   PetscErrorCode    ierr;
3130   PetscInt          n = A->rmap->n;
3131   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3132   PetscScalar       *x,sum;
3133   const PetscScalar *b;
3134   const MatScalar   *aa = a->a,*v;
3135   PetscInt          i,nz;
3136 
3137   PetscFunctionBegin;
3138   if (!n) PetscFunctionReturn(0);
3139 
3140   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3141   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3142 
3143   /* forward solve the lower triangular */
3144   x[0] = b[0];
3145   v    = aa;
3146   vi   = aj;
3147   for (i=1; i<n; i++) {
3148     nz  = ai[i+1] - ai[i];
3149     sum = b[i];
3150     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3151     v  += nz;
3152     vi += nz;
3153     x[i] = sum;
3154   }
3155 
3156   /* backward solve the upper triangular */
3157   for (i=n-1; i>=0; i--){
3158     v   = aa + adiag[i+1] + 1;
3159     vi  = aj + adiag[i+1] + 1;
3160     nz = adiag[i] - adiag[i+1]-1;
3161     sum = x[i];
3162     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3163     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3164   }
3165 
3166   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3167   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3168   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3169   PetscFunctionReturn(0);
3170 }
3171 
3172 #undef __FUNCT__
3173 #define __FUNCT__ "MatSolve_SeqAIJ"
3174 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3175 {
3176   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3177   IS                iscol = a->col,isrow = a->row;
3178   PetscErrorCode    ierr;
3179   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3180   const PetscInt    *rout,*cout,*r,*c;
3181   PetscScalar       *x,*tmp,sum;
3182   const PetscScalar *b;
3183   const MatScalar   *aa = a->a,*v;
3184 
3185   PetscFunctionBegin;
3186   if (!n) PetscFunctionReturn(0);
3187 
3188   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3189   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3190   tmp  = a->solve_work;
3191 
3192   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
3193   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
3194 
3195   /* forward solve the lower triangular */
3196   tmp[0] = b[r[0]];
3197   v      = aa;
3198   vi     = aj;
3199   for (i=1; i<n; i++) {
3200     nz  = ai[i+1] - ai[i];
3201     sum = b[r[i]];
3202     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3203     tmp[i] = sum;
3204     v += nz; vi += nz;
3205   }
3206 
3207   /* backward solve the upper triangular */
3208   for (i=n-1; i>=0; i--){
3209     v   = aa + adiag[i+1]+1;
3210     vi  = aj + adiag[i+1]+1;
3211     nz  = adiag[i]-adiag[i+1]-1;
3212     sum = tmp[i];
3213     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3214     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3215   }
3216 
3217   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
3218   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
3219   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3220   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3221   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
3222   PetscFunctionReturn(0);
3223 }
3224 
3225 #undef __FUNCT__
3226 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
3227 /*
3228     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3229 */
3230 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3231 {
3232   Mat                B = *fact;
3233   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b;
3234   IS                 isicol;
3235   PetscErrorCode     ierr;
3236   const PetscInt     *r,*ic;
3237   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3238   PetscInt           *bi,*bj,*bdiag,*bdiag_rev;
3239   PetscInt           row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3240   PetscInt           nlnk,*lnk;
3241   PetscBT            lnkbt;
3242   PetscBool          row_identity,icol_identity;
3243   MatScalar          *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3244   const PetscInt     *ics;
3245   PetscInt           j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3246   PetscReal          dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount;
3247   PetscInt           dtcount=(PetscInt)info->dtcount,nnz_max;
3248   PetscBool          missing;
3249 
3250   PetscFunctionBegin;
3251 
3252   if (dt      == PETSC_DEFAULT) dt      = 0.005;
3253   if (dtcol   == PETSC_DEFAULT) dtcol   = 0.01; /* XXX unused! */
3254   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3255 
3256   /* ------- symbolic factorization, can be reused ---------*/
3257   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3258   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3259   adiag=a->diag;
3260 
3261   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
3262 
3263   /* bdiag is location of diagonal in factor */
3264   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);     /* becomes b->diag */
3265   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */
3266 
3267   /* allocate row pointers bi */
3268   ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3269 
3270   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3271   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3272   nnz_max  = ai[n]+2*n*dtcount+2;
3273 
3274   ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3275   ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3276 
3277   /* put together the new matrix */
3278   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
3279   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
3280   b    = (Mat_SeqAIJ*)B->data;
3281   b->free_a       = PETSC_TRUE;
3282   b->free_ij      = PETSC_TRUE;
3283   b->singlemalloc = PETSC_FALSE;
3284   b->a          = ba;
3285   b->j          = bj;
3286   b->i          = bi;
3287   b->diag       = bdiag;
3288   b->ilen       = 0;
3289   b->imax       = 0;
3290   b->row        = isrow;
3291   b->col        = iscol;
3292   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
3293   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
3294   b->icol       = isicol;
3295   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3296 
3297   ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3298   b->maxnz = nnz_max;
3299 
3300   B->factortype            = MAT_FACTOR_ILUDT;
3301   B->info.factor_mallocs   = 0;
3302   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3303   CHKMEMQ;
3304   /* ------- end of symbolic factorization ---------*/
3305 
3306   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3307   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3308   ics  = ic;
3309 
3310   /* linked list for storing column indices of the active row */
3311   nlnk = n + 1;
3312   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3313 
3314   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3315   ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr);
3316   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3317   ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr);
3318   ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr);
3319 
3320   bi[0]    = 0;
3321   bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3322   bdiag_rev[n] = bdiag[0];
3323   bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3324   for (i=0; i<n; i++) {
3325     /* copy initial fill into linked list */
3326     nzi = 0; /* nonzeros for active row i */
3327     nzi = ai[r[i]+1] - ai[r[i]];
3328     if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3329     nzi_al = adiag[r[i]] - ai[r[i]];
3330     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3331     ajtmp = aj + ai[r[i]];
3332     ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3333 
3334     /* load in initial (unfactored row) */
3335     aatmp = a->a + ai[r[i]];
3336     for (j=0; j<nzi; j++) {
3337       rtmp[ics[*ajtmp++]] = *aatmp++;
3338     }
3339 
3340     /* add pivot rows into linked list */
3341     row = lnk[n];
3342     while (row < i ) {
3343       nzi_bl = bi[row+1] - bi[row] + 1;
3344       bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3345       ierr  = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
3346       nzi  += nlnk;
3347       row   = lnk[row];
3348     }
3349 
3350     /* copy data from lnk into jtmp, then initialize lnk */
3351     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
3352 
3353     /* numerical factorization */
3354     bjtmp = jtmp;
3355     row   = *bjtmp++; /* 1st pivot row */
3356     while  ( row < i ) {
3357       pc         = rtmp + row;
3358       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3359       multiplier = (*pc) * (*pv);
3360       *pc        = multiplier;
3361       if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */
3362         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3363         pv         = ba + bdiag[row+1] + 1;
3364         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3365         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3366         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3367         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3368       }
3369       row = *bjtmp++;
3370     }
3371 
3372     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3373     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3374     nzi_bl = 0; j = 0;
3375     while (jtmp[j] < i){ /* Note: jtmp is sorted */
3376       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3377       nzi_bl++; j++;
3378     }
3379     nzi_bu = nzi - nzi_bl -1;
3380     while (j < nzi){
3381       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3382       j++;
3383     }
3384 
3385     bjtmp = bj + bi[i];
3386     batmp = ba + bi[i];
3387     /* apply level dropping rule to L part */
3388     ncut = nzi_al + dtcount;
3389     if (ncut < nzi_bl){
3390       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
3391       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
3392     } else {
3393       ncut = nzi_bl;
3394     }
3395     for (j=0; j<ncut; j++){
3396       bjtmp[j] = jtmp[j];
3397       batmp[j] = vtmp[j];
3398       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3399     }
3400     bi[i+1] = bi[i] + ncut;
3401     nzi = ncut + 1;
3402 
3403     /* apply level dropping rule to U part */
3404     ncut = nzi_au + dtcount;
3405     if (ncut < nzi_bu){
3406       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
3407       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
3408     } else {
3409       ncut = nzi_bu;
3410     }
3411     nzi += ncut;
3412 
3413     /* mark bdiagonal */
3414     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3415     bdiag_rev[n-i-1] = bdiag[i+1];
3416     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3417     bjtmp = bj + bdiag[i];
3418     batmp = ba + bdiag[i];
3419     *bjtmp = i;
3420     *batmp = diag_tmp; /* rtmp[i]; */
3421     if (*batmp == 0.0) {
3422       *batmp = dt+shift;
3423       /* printf(" row %d add shift %g\n",i,shift); */
3424     }
3425     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3426     /* printf(" (%d,%g),",*bjtmp,*batmp); */
3427 
3428     bjtmp = bj + bdiag[i+1]+1;
3429     batmp = ba + bdiag[i+1]+1;
3430     for (k=0; k<ncut; k++){
3431       bjtmp[k] = jtmp[nzi_bl+1+k];
3432       batmp[k] = vtmp[nzi_bl+1+k];
3433       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3434     }
3435     /* printf("\n"); */
3436 
3437     im[i]   = nzi; /* used by PetscLLAddSortedLU() */
3438     /*
3439     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3440     printf(" ----------------------------\n");
3441     */
3442   } /* for (i=0; i<n; i++) */
3443   /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3444   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3445 
3446   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3447   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3448 
3449   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3450   ierr = PetscFree2(im,jtmp);CHKERRQ(ierr);
3451   ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr);
3452   ierr = PetscFree(bdiag_rev);CHKERRQ(ierr);
3453 
3454   ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
3455   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3456 
3457   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3458   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
3459   if (row_identity && icol_identity) {
3460     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3461   } else {
3462     B->ops->solve = MatSolve_SeqAIJ;
3463   }
3464 
3465   B->ops->solveadd          = 0;
3466   B->ops->solvetranspose    = 0;
3467   B->ops->solvetransposeadd = 0;
3468   B->ops->matsolve          = 0;
3469   B->assembled              = PETSC_TRUE;
3470   B->preallocated           = PETSC_TRUE;
3471   PetscFunctionReturn(0);
3472 }
3473 
3474 /* a wraper of MatILUDTFactor_SeqAIJ() */
3475 #undef __FUNCT__
3476 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
3477 /*
3478     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3479 */
3480 
3481 PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3482 {
3483   PetscErrorCode     ierr;
3484 
3485   PetscFunctionBegin;
3486   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
3487   PetscFunctionReturn(0);
3488 }
3489 
3490 /*
3491    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3492    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3493 */
3494 #undef __FUNCT__
3495 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
3496 /*
3497     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3498 */
3499 
3500 PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3501 {
3502   Mat            C=fact;
3503   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
3504   IS             isrow = b->row,isicol = b->icol;
3505   PetscErrorCode ierr;
3506   const PetscInt *r,*ic,*ics;
3507   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3508   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3509   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3510   PetscReal      dt=info->dt,shift=info->shiftamount;
3511   PetscBool      row_identity, col_identity;
3512 
3513   PetscFunctionBegin;
3514   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3515   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3516   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
3517   ics  = ic;
3518 
3519   for (i=0; i<n; i++){
3520     /* initialize rtmp array */
3521     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3522     bjtmp = bj + bi[i];
3523     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3524     rtmp[i] = 0.0;
3525     nzu   = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3526     bjtmp = bj + bdiag[i+1] + 1;
3527     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3528 
3529     /* load in initial unfactored row of A */
3530     /* printf("row %d\n",i); */
3531     nz    = ai[r[i]+1] - ai[r[i]];
3532     ajtmp = aj + ai[r[i]];
3533     v     = aa + ai[r[i]];
3534     for (j=0; j<nz; j++) {
3535       rtmp[ics[*ajtmp++]] = v[j];
3536       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3537     }
3538     /* printf("\n"); */
3539 
3540     /* numerical factorization */
3541     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3542     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3543     k = 0;
3544     while (k < nzl){
3545       row   = *bjtmp++;
3546       /* printf("  prow %d\n",row); */
3547       pc         = rtmp + row;
3548       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3549       multiplier = (*pc) * (*pv);
3550       *pc        = multiplier;
3551       if (PetscAbsScalar(multiplier) > dt){
3552         pj         = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3553         pv         = b->a + bdiag[row+1] + 1;
3554         nz         = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3555         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3556         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3557       }
3558       k++;
3559     }
3560 
3561     /* finished row so stick it into b->a */
3562     /* L-part */
3563     pv = b->a + bi[i] ;
3564     pj = bj + bi[i] ;
3565     nzl = bi[i+1] - bi[i];
3566     for (j=0; j<nzl; j++) {
3567       pv[j] = rtmp[pj[j]];
3568       /* printf(" (%d,%g),",pj[j],pv[j]); */
3569     }
3570 
3571     /* diagonal: invert diagonal entries for simplier triangular solves */
3572     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3573     b->a[bdiag[i]] = 1.0/rtmp[i];
3574     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3575 
3576     /* U-part */
3577     pv = b->a + bdiag[i+1] + 1;
3578     pj = bj + bdiag[i+1] + 1;
3579     nzu = bdiag[i] - bdiag[i+1] - 1;
3580     for (j=0; j<nzu; j++) {
3581       pv[j] = rtmp[pj[j]];
3582       /* printf(" (%d,%g),",pj[j],pv[j]); */
3583     }
3584     /* printf("\n"); */
3585   }
3586 
3587   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3588   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3589   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3590 
3591   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3592   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
3593   if (row_identity && col_identity) {
3594     C->ops->solve   = MatSolve_SeqAIJ_NaturalOrdering;
3595   } else {
3596     C->ops->solve   = MatSolve_SeqAIJ;
3597   }
3598   C->ops->solveadd           = 0;
3599   C->ops->solvetranspose     = 0;
3600   C->ops->solvetransposeadd  = 0;
3601   C->ops->matsolve           = 0;
3602   C->assembled    = PETSC_TRUE;
3603   C->preallocated = PETSC_TRUE;
3604   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
3605   PetscFunctionReturn(0);
3606 }
3607