xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision bebe2cf65d55febe21a5af8db2bd2e168caaa2e7)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ"
9 /*
10       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
11 
12       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
13 */
14 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
15 {
16   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
17   PetscErrorCode    ierr;
18   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
19   const PetscInt    *ai = a->i, *aj = a->j;
20   const PetscScalar *aa = a->a;
21   PetscBool         *done;
22   PetscReal         best,past = 0,future;
23 
24   PetscFunctionBegin;
25   /* pick initial row */
26   best = -1;
27   for (i=0; i<n; i++) {
28     future = 0.0;
29     for (j=ai[i]; j<ai[i+1]; j++) {
30       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
31       else              past  = PetscAbsScalar(aa[j]);
32     }
33     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34     if (past/future > best) {
35       best    = past/future;
36       current = i;
37     }
38   }
39 
40   ierr     = PetscMalloc1(n,&done);CHKERRQ(ierr);
41   ierr     = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr);
42   ierr     = PetscMalloc1(n,&order);CHKERRQ(ierr);
43   order[0] = current;
44   for (i=0; i<n-1; i++) {
45     done[current] = PETSC_TRUE;
46     best          = -1;
47     /* loop over all neighbors of current pivot */
48     for (j=ai[current]; j<ai[current+1]; j++) {
49       jj = aj[j];
50       if (done[jj]) continue;
51       /* loop over columns of potential next row computing weights for below and above diagonal */
52       past = future = 0.0;
53       for (k=ai[jj]; k<ai[jj+1]; k++) {
54         kk = aj[k];
55         if (done[kk]) past += PetscAbsScalar(aa[k]);
56         else if (kk != jj) future += PetscAbsScalar(aa[k]);
57       }
58       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59       if (past/future > best) {
60         best       = past/future;
61         newcurrent = jj;
62       }
63     }
64     if (best == -1) { /* no neighbors to select from so select best of all that remain */
65       best = -1;
66       for (k=0; k<n; k++) {
67         if (done[k]) continue;
68         future = 0.0;
69         past   = 0.0;
70         for (j=ai[k]; j<ai[k+1]; j++) {
71           kk = aj[j];
72           if (done[kk])       past += PetscAbsScalar(aa[j]);
73           else if (kk != k) future += PetscAbsScalar(aa[j]);
74         }
75         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76         if (past/future > best) {
77           best       = past/future;
78           newcurrent = k;
79         }
80       }
81     }
82     if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
83     current    = newcurrent;
84     order[i+1] = current;
85   }
86   ierr  = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr);
87   *icol = *irow;
88   ierr  = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr);
89   ierr  = PetscFree(done);CHKERRQ(ierr);
90   ierr  = PetscFree(order);CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 
94 #undef __FUNCT__
95 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
96 PETSC_EXTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
97 {
98   PetscInt       n = A->rmap->n;
99   PetscErrorCode ierr;
100 
101   PetscFunctionBegin;
102 #if defined(PETSC_USE_COMPLEX)
103   if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported");
104 #endif
105   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
106   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
107   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
108     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
109 
110     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
111     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
112 
113     ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr);
114   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
115     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
116     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
117 
118     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
119     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
120   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
121   (*B)->factortype = ftype;
122   PetscFunctionReturn(0);
123 }
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace"
127 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
128 {
129   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
130   IS                 isicol;
131   PetscErrorCode     ierr;
132   const PetscInt     *r,*ic;
133   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
134   PetscInt           *bi,*bj,*ajtmp;
135   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
136   PetscReal          f;
137   PetscInt           nlnk,*lnk,k,**bi_ptr;
138   PetscFreeSpaceList free_space=NULL,current_space=NULL;
139   PetscBT            lnkbt;
140   PetscBool          missing;
141 
142   PetscFunctionBegin;
143   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
144   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
145   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
146 
147   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
148   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
149   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
150 
151   /* get new row pointers */
152   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
153   bi[0] = 0;
154 
155   /* bdiag is location of diagonal in factor */
156   ierr     = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
157   bdiag[0] = 0;
158 
159   /* linked list for storing column indices of the active row */
160   nlnk = n + 1;
161   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
162 
163   ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr);
164 
165   /* initial FreeSpace size is f*(ai[n]+1) */
166   f             = info->fill;
167   ierr          = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
168   current_space = free_space;
169 
170   for (i=0; i<n; i++) {
171     /* copy previous fill into linked list */
172     nzi = 0;
173     nnz = ai[r[i]+1] - ai[r[i]];
174     ajtmp = aj + ai[r[i]];
175     ierr  = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
176     nzi  += nlnk;
177 
178     /* add pivot rows into linked list */
179     row = lnk[n];
180     while (row < i) {
181       nzbd  = bdiag[row] - bi[row] + 1;   /* num of entries in the row with column index <= row */
182       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
183       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
184       nzi  += nlnk;
185       row   = lnk[row];
186     }
187     bi[i+1] = bi[i] + nzi;
188     im[i]   = nzi;
189 
190     /* mark bdiag */
191     nzbd = 0;
192     nnz  = nzi;
193     k    = lnk[n];
194     while (nnz-- && k < i) {
195       nzbd++;
196       k = lnk[k];
197     }
198     bdiag[i] = bi[i] + nzbd;
199 
200     /* if free space is not available, make more free space */
201     if (current_space->local_remaining<nzi) {
202       nnz  = (n - i)*nzi; /* estimated and max additional space needed */
203       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
204       reallocs++;
205     }
206 
207     /* copy data into free space, then initialize lnk */
208     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
209 
210     bi_ptr[i]                       = current_space->array;
211     current_space->array           += nzi;
212     current_space->local_used      += nzi;
213     current_space->local_remaining -= nzi;
214   }
215 #if defined(PETSC_USE_INFO)
216   if (ai[n] != 0) {
217     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
218     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
219     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
220     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr);
221     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
222   } else {
223     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
224   }
225 #endif
226 
227   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
228   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
229 
230   /* destroy list of free space and other temporary array(s) */
231   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
232   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
233   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
234   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
235 
236   /* put together the new matrix */
237   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
238   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr);
239   b    = (Mat_SeqAIJ*)(B)->data;
240 
241   b->free_a       = PETSC_TRUE;
242   b->free_ij      = PETSC_TRUE;
243   b->singlemalloc = PETSC_FALSE;
244 
245   ierr    = PetscMalloc1(bi[n]+1,&b->a);CHKERRQ(ierr);
246   b->j    = bj;
247   b->i    = bi;
248   b->diag = bdiag;
249   b->ilen = 0;
250   b->imax = 0;
251   b->row  = isrow;
252   b->col  = iscol;
253   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
254   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
255   b->icol = isicol;
256   ierr    = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
257 
258   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
259   ierr     = PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
260   b->maxnz = b->nz = bi[n];
261 
262   (B)->factortype            = MAT_FACTOR_LU;
263   (B)->info.factor_mallocs   = reallocs;
264   (B)->info.fill_ratio_given = f;
265 
266   if (ai[n]) {
267     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
268   } else {
269     (B)->info.fill_ratio_needed = 0.0;
270   }
271   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
272   if (a->inode.size) {
273     (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
280 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
281 {
282   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
283   IS                 isicol;
284   PetscErrorCode     ierr;
285   const PetscInt     *r,*ic,*ai=a->i,*aj=a->j,*ajtmp;
286   PetscInt           i,n=A->rmap->n;
287   PetscInt           *bi,*bj;
288   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
289   PetscReal          f;
290   PetscInt           nlnk,*lnk,k,**bi_ptr;
291   PetscFreeSpaceList free_space=NULL,current_space=NULL;
292   PetscBT            lnkbt;
293   PetscBool          missing;
294 
295   PetscFunctionBegin;
296   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
297   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
298   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
299 
300   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
301   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
302   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
303 
304   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
305   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
306   ierr  = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
307   bi[0] = bdiag[0] = 0;
308 
309   /* linked list for storing column indices of the active row */
310   nlnk = n + 1;
311   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
312 
313   ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr);
314 
315   /* initial FreeSpace size is f*(ai[n]+1) */
316   f             = info->fill;
317   ierr          = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
318   current_space = free_space;
319 
320   for (i=0; i<n; i++) {
321     /* copy previous fill into linked list */
322     nzi = 0;
323     nnz = ai[r[i]+1] - ai[r[i]];
324     ajtmp = aj + ai[r[i]];
325     ierr  = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
326     nzi  += nlnk;
327 
328     /* add pivot rows into linked list */
329     row = lnk[n];
330     while (row < i) {
331       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
332       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
333       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
334       nzi  += nlnk;
335       row   = lnk[row];
336     }
337     bi[i+1] = bi[i] + nzi;
338     im[i]   = nzi;
339 
340     /* mark bdiag */
341     nzbd = 0;
342     nnz  = nzi;
343     k    = lnk[n];
344     while (nnz-- && k < i) {
345       nzbd++;
346       k = lnk[k];
347     }
348     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
349 
350     /* if free space is not available, make more free space */
351     if (current_space->local_remaining<nzi) {
352       nnz  = 2*(n - i)*nzi; /* estimated and max additional space needed */
353       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
354       reallocs++;
355     }
356 
357     /* copy data into free space, then initialize lnk */
358     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
359 
360     bi_ptr[i]                       = current_space->array;
361     current_space->array           += nzi;
362     current_space->local_used      += nzi;
363     current_space->local_remaining -= nzi;
364   }
365 
366   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
367   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
368 
369   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
370   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
371   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
372   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
373   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
374 
375   /* put together the new matrix */
376   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
377   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr);
378   b    = (Mat_SeqAIJ*)(B)->data;
379 
380   b->free_a       = PETSC_TRUE;
381   b->free_ij      = PETSC_TRUE;
382   b->singlemalloc = PETSC_FALSE;
383 
384   ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr);
385 
386   b->j    = bj;
387   b->i    = bi;
388   b->diag = bdiag;
389   b->ilen = 0;
390   b->imax = 0;
391   b->row  = isrow;
392   b->col  = iscol;
393   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
394   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
395   b->icol = isicol;
396   ierr    = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
397 
398   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
399   ierr     = PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
400   b->maxnz = b->nz = bdiag[0]+1;
401 
402   B->factortype            = MAT_FACTOR_LU;
403   B->info.factor_mallocs   = reallocs;
404   B->info.fill_ratio_given = f;
405 
406   if (ai[n]) {
407     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
408   } else {
409     B->info.fill_ratio_needed = 0.0;
410   }
411 #if defined(PETSC_USE_INFO)
412   if (ai[n] != 0) {
413     PetscReal af = B->info.fill_ratio_needed;
414     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
415     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
416     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr);
417     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
418   } else {
419     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
420   }
421 #endif
422   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
423   if (a->inode.size) {
424     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
425   }
426   ierr = MatSeqAIJCheckInode_FactorLU(B);CHKERRQ(ierr);
427   PetscFunctionReturn(0);
428 }
429 
430 /*
431     Trouble in factorization, should we dump the original matrix?
432 */
433 #undef __FUNCT__
434 #define __FUNCT__ "MatFactorDumpMatrix"
435 PetscErrorCode MatFactorDumpMatrix(Mat A)
436 {
437   PetscErrorCode ierr;
438   PetscBool      flg = PETSC_FALSE;
439 
440   PetscFunctionBegin;
441   ierr = PetscOptionsGetBool(NULL,"-mat_factor_dump_on_error",&flg,NULL);CHKERRQ(ierr);
442   if (flg) {
443     PetscViewer viewer;
444     char        filename[PETSC_MAX_PATH_LEN];
445 
446     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
447     ierr = PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
448     ierr = MatView(A,viewer);CHKERRQ(ierr);
449     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
450   }
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
456 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
457 {
458   Mat             C     =B;
459   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
460   IS              isrow = b->row,isicol = b->icol;
461   PetscErrorCode  ierr;
462   const PetscInt  *r,*ic,*ics;
463   const PetscInt  n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
464   PetscInt        i,j,k,nz,nzL,row,*pj;
465   const PetscInt  *ajtmp,*bjtmp;
466   MatScalar       *rtmp,*pc,multiplier,*pv;
467   const MatScalar *aa=a->a,*v;
468   PetscBool       row_identity,col_identity;
469   FactorShiftCtx  sctx;
470   const PetscInt  *ddiag;
471   PetscReal       rs;
472   MatScalar       d;
473 
474   PetscFunctionBegin;
475   /* MatPivotSetUp(): initialize shift context sctx */
476   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
477 
478   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
479     ddiag          = a->diag;
480     sctx.shift_top = info->zeropivot;
481     for (i=0; i<n; i++) {
482       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
483       d  = (aa)[ddiag[i]];
484       rs = -PetscAbsScalar(d) - PetscRealPart(d);
485       v  = aa+ai[i];
486       nz = ai[i+1] - ai[i];
487       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
488       if (rs>sctx.shift_top) sctx.shift_top = rs;
489     }
490     sctx.shift_top *= 1.1;
491     sctx.nshift_max = 5;
492     sctx.shift_lo   = 0.;
493     sctx.shift_hi   = 1.;
494   }
495 
496   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
497   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
498   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
499   ics  = ic;
500 
501   do {
502     sctx.newshift = PETSC_FALSE;
503     for (i=0; i<n; i++) {
504       /* zero rtmp */
505       /* L part */
506       nz    = bi[i+1] - bi[i];
507       bjtmp = bj + bi[i];
508       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
509 
510       /* U part */
511       nz    = bdiag[i]-bdiag[i+1];
512       bjtmp = bj + bdiag[i+1]+1;
513       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
514 
515       /* load in initial (unfactored row) */
516       nz    = ai[r[i]+1] - ai[r[i]];
517       ajtmp = aj + ai[r[i]];
518       v     = aa + ai[r[i]];
519       for (j=0; j<nz; j++) {
520         rtmp[ics[ajtmp[j]]] = v[j];
521       }
522       /* ZeropivotApply() */
523       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
524 
525       /* elimination */
526       bjtmp = bj + bi[i];
527       row   = *bjtmp++;
528       nzL   = bi[i+1] - bi[i];
529       for (k=0; k < nzL; k++) {
530         pc = rtmp + row;
531         if (*pc != 0.0) {
532           pv         = b->a + bdiag[row];
533           multiplier = *pc * (*pv);
534           *pc        = multiplier;
535 
536           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
537           pv = b->a + bdiag[row+1]+1;
538           nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
539 
540           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
541           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
542         }
543         row = *bjtmp++;
544       }
545 
546       /* finished row so stick it into b->a */
547       rs = 0.0;
548       /* L part */
549       pv = b->a + bi[i];
550       pj = b->j + bi[i];
551       nz = bi[i+1] - bi[i];
552       for (j=0; j<nz; j++) {
553         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
554       }
555 
556       /* U part */
557       pv = b->a + bdiag[i+1]+1;
558       pj = b->j + bdiag[i+1]+1;
559       nz = bdiag[i] - bdiag[i+1]-1;
560       for (j=0; j<nz; j++) {
561         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
562       }
563 
564       sctx.rs = rs;
565       sctx.pv = rtmp[i];
566       ierr    = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
567       if (sctx.newshift) break; /* break for-loop */
568       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
569 
570       /* Mark diagonal and invert diagonal for simplier triangular solves */
571       pv  = b->a + bdiag[i];
572       *pv = 1.0/rtmp[i];
573 
574     } /* endof for (i=0; i<n; i++) { */
575 
576     /* MatPivotRefine() */
577     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
578       /*
579        * if no shift in this attempt & shifting & started shifting & can refine,
580        * then try lower shift
581        */
582       sctx.shift_hi       = sctx.shift_fraction;
583       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
584       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
585       sctx.newshift       = PETSC_TRUE;
586       sctx.nshift++;
587     }
588   } while (sctx.newshift);
589 
590   ierr = PetscFree(rtmp);CHKERRQ(ierr);
591   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
592   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
593 
594   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
595   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
596   if (b->inode.size) {
597     C->ops->solve = MatSolve_SeqAIJ_Inode;
598   } else if (row_identity && col_identity) {
599     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
600   } else {
601     C->ops->solve = MatSolve_SeqAIJ;
602   }
603   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
604   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
605   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
606   C->ops->matsolve          = MatMatSolve_SeqAIJ;
607   C->assembled              = PETSC_TRUE;
608   C->preallocated           = PETSC_TRUE;
609 
610   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
611 
612   /* MatShiftView(A,info,&sctx) */
613   if (sctx.nshift) {
614     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
615       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
616     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
617       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
618     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
619       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr);
620     }
621   }
622   PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace"
627 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
628 {
629   Mat             C     =B;
630   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
631   IS              isrow = b->row,isicol = b->icol;
632   PetscErrorCode  ierr;
633   const PetscInt  *r,*ic,*ics;
634   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
635   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
636   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
637   MatScalar       *pv,*rtmp,*pc,multiplier,d;
638   const MatScalar *v,*aa=a->a;
639   PetscReal       rs=0.0;
640   FactorShiftCtx  sctx;
641   const PetscInt  *ddiag;
642   PetscBool       row_identity, col_identity;
643 
644   PetscFunctionBegin;
645   /* MatPivotSetUp(): initialize shift context sctx */
646   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
647 
648   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
649     ddiag          = a->diag;
650     sctx.shift_top = info->zeropivot;
651     for (i=0; i<n; i++) {
652       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
653       d  = (aa)[ddiag[i]];
654       rs = -PetscAbsScalar(d) - PetscRealPart(d);
655       v  = aa+ai[i];
656       nz = ai[i+1] - ai[i];
657       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
658       if (rs>sctx.shift_top) sctx.shift_top = rs;
659     }
660     sctx.shift_top *= 1.1;
661     sctx.nshift_max = 5;
662     sctx.shift_lo   = 0.;
663     sctx.shift_hi   = 1.;
664   }
665 
666   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
667   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
668   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
669   ics  = ic;
670 
671   do {
672     sctx.newshift = PETSC_FALSE;
673     for (i=0; i<n; i++) {
674       nz    = bi[i+1] - bi[i];
675       bjtmp = bj + bi[i];
676       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
677 
678       /* load in initial (unfactored row) */
679       nz    = ai[r[i]+1] - ai[r[i]];
680       ajtmp = aj + ai[r[i]];
681       v     = aa + ai[r[i]];
682       for (j=0; j<nz; j++) {
683         rtmp[ics[ajtmp[j]]] = v[j];
684       }
685       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
686 
687       row = *bjtmp++;
688       while  (row < i) {
689         pc = rtmp + row;
690         if (*pc != 0.0) {
691           pv         = b->a + diag_offset[row];
692           pj         = b->j + diag_offset[row] + 1;
693           multiplier = *pc / *pv++;
694           *pc        = multiplier;
695           nz         = bi[row+1] - diag_offset[row] - 1;
696           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
697           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
698         }
699         row = *bjtmp++;
700       }
701       /* finished row so stick it into b->a */
702       pv   = b->a + bi[i];
703       pj   = b->j + bi[i];
704       nz   = bi[i+1] - bi[i];
705       diag = diag_offset[i] - bi[i];
706       rs   = 0.0;
707       for (j=0; j<nz; j++) {
708         pv[j] = rtmp[pj[j]];
709         rs   += PetscAbsScalar(pv[j]);
710       }
711       rs -= PetscAbsScalar(pv[diag]);
712 
713       sctx.rs = rs;
714       sctx.pv = pv[diag];
715       ierr    = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
716       if (sctx.newshift) break;
717       pv[diag] = sctx.pv;
718     }
719 
720     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
721       /*
722        * if no shift in this attempt & shifting & started shifting & can refine,
723        * then try lower shift
724        */
725       sctx.shift_hi       = sctx.shift_fraction;
726       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
727       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
728       sctx.newshift       = PETSC_TRUE;
729       sctx.nshift++;
730     }
731   } while (sctx.newshift);
732 
733   /* invert diagonal entries for simplier triangular solves */
734   for (i=0; i<n; i++) {
735     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
736   }
737   ierr = PetscFree(rtmp);CHKERRQ(ierr);
738   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
739   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
740 
741   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
742   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
743   if (row_identity && col_identity) {
744     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
745   } else {
746     C->ops->solve = MatSolve_SeqAIJ_inplace;
747   }
748   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
749   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
750   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
751   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
752 
753   C->assembled    = PETSC_TRUE;
754   C->preallocated = PETSC_TRUE;
755 
756   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
757   if (sctx.nshift) {
758     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
759       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
760     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
761       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
762     }
763   }
764   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
765   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
766 
767   ierr = MatSeqAIJCheckInode(C);CHKERRQ(ierr);
768   PetscFunctionReturn(0);
769 }
770 
771 /*
772    This routine implements inplace ILU(0) with row or/and column permutations.
773    Input:
774      A - original matrix
775    Output;
776      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
777          a->j (col index) is permuted by the inverse of colperm, then sorted
778          a->a reordered accordingly with a->j
779          a->diag (ptr to diagonal elements) is updated.
780 */
781 #undef __FUNCT__
782 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
783 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
784 {
785   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data;
786   IS              isrow = a->row,isicol = a->icol;
787   PetscErrorCode  ierr;
788   const PetscInt  *r,*ic,*ics;
789   PetscInt        i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
790   PetscInt        *ajtmp,nz,row;
791   PetscInt        *diag = a->diag,nbdiag,*pj;
792   PetscScalar     *rtmp,*pc,multiplier,d;
793   MatScalar       *pv,*v;
794   PetscReal       rs;
795   FactorShiftCtx  sctx;
796   const MatScalar *aa=a->a,*vtmp;
797 
798   PetscFunctionBegin;
799   if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
800 
801   /* MatPivotSetUp(): initialize shift context sctx */
802   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
803 
804   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
805     const PetscInt *ddiag = a->diag;
806     sctx.shift_top = info->zeropivot;
807     for (i=0; i<n; i++) {
808       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
809       d    = (aa)[ddiag[i]];
810       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
811       vtmp = aa+ai[i];
812       nz   = ai[i+1] - ai[i];
813       for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
814       if (rs>sctx.shift_top) sctx.shift_top = rs;
815     }
816     sctx.shift_top *= 1.1;
817     sctx.nshift_max = 5;
818     sctx.shift_lo   = 0.;
819     sctx.shift_hi   = 1.;
820   }
821 
822   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
823   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
824   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
825   ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
826   ics  = ic;
827 
828 #if defined(MV)
829   sctx.shift_top      = 0.;
830   sctx.nshift_max     = 0;
831   sctx.shift_lo       = 0.;
832   sctx.shift_hi       = 0.;
833   sctx.shift_fraction = 0.;
834 
835   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
836     sctx.shift_top = 0.;
837     for (i=0; i<n; i++) {
838       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
839       d  = (a->a)[diag[i]];
840       rs = -PetscAbsScalar(d) - PetscRealPart(d);
841       v  = a->a+ai[i];
842       nz = ai[i+1] - ai[i];
843       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
844       if (rs>sctx.shift_top) sctx.shift_top = rs;
845     }
846     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
847     sctx.shift_top *= 1.1;
848     sctx.nshift_max = 5;
849     sctx.shift_lo   = 0.;
850     sctx.shift_hi   = 1.;
851   }
852 
853   sctx.shift_amount = 0.;
854   sctx.nshift       = 0;
855 #endif
856 
857   do {
858     sctx.newshift = PETSC_FALSE;
859     for (i=0; i<n; i++) {
860       /* load in initial unfactored row */
861       nz    = ai[r[i]+1] - ai[r[i]];
862       ajtmp = aj + ai[r[i]];
863       v     = a->a + ai[r[i]];
864       /* sort permuted ajtmp and values v accordingly */
865       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
866       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
867 
868       diag[r[i]] = ai[r[i]];
869       for (j=0; j<nz; j++) {
870         rtmp[ajtmp[j]] = v[j];
871         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
872       }
873       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
874 
875       row = *ajtmp++;
876       while  (row < i) {
877         pc = rtmp + row;
878         if (*pc != 0.0) {
879           pv = a->a + diag[r[row]];
880           pj = aj + diag[r[row]] + 1;
881 
882           multiplier = *pc / *pv++;
883           *pc        = multiplier;
884           nz         = ai[r[row]+1] - diag[r[row]] - 1;
885           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
886           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
887         }
888         row = *ajtmp++;
889       }
890       /* finished row so overwrite it onto a->a */
891       pv     = a->a + ai[r[i]];
892       pj     = aj + ai[r[i]];
893       nz     = ai[r[i]+1] - ai[r[i]];
894       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
895 
896       rs = 0.0;
897       for (j=0; j<nz; j++) {
898         pv[j] = rtmp[pj[j]];
899         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
900       }
901 
902       sctx.rs = rs;
903       sctx.pv = pv[nbdiag];
904       ierr    = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
905       if (sctx.newshift) break;
906       pv[nbdiag] = sctx.pv;
907     }
908 
909     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
910       /*
911        * if no shift in this attempt & shifting & started shifting & can refine,
912        * then try lower shift
913        */
914       sctx.shift_hi       = sctx.shift_fraction;
915       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
916       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
917       sctx.newshift       = PETSC_TRUE;
918       sctx.nshift++;
919     }
920   } while (sctx.newshift);
921 
922   /* invert diagonal entries for simplier triangular solves */
923   for (i=0; i<n; i++) {
924     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
925   }
926 
927   ierr = PetscFree(rtmp);CHKERRQ(ierr);
928   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
929   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
930 
931   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
932   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
933   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
934   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
935 
936   A->assembled    = PETSC_TRUE;
937   A->preallocated = PETSC_TRUE;
938 
939   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
940   if (sctx.nshift) {
941     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
942       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
943     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
944       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
945     }
946   }
947   PetscFunctionReturn(0);
948 }
949 
950 /* ----------------------------------------------------------- */
951 #undef __FUNCT__
952 #define __FUNCT__ "MatLUFactor_SeqAIJ"
953 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
954 {
955   PetscErrorCode ierr;
956   Mat            C;
957 
958   PetscFunctionBegin;
959   ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
960   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
961   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
962 
963   A->ops->solve          = C->ops->solve;
964   A->ops->solvetranspose = C->ops->solvetranspose;
965 
966   ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
967   ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 /* ----------------------------------------------------------- */
971 
972 
973 #undef __FUNCT__
974 #define __FUNCT__ "MatSolve_SeqAIJ_inplace"
975 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
976 {
977   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
978   IS                iscol = a->col,isrow = a->row;
979   PetscErrorCode    ierr;
980   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
981   PetscInt          nz;
982   const PetscInt    *rout,*cout,*r,*c;
983   PetscScalar       *x,*tmp,*tmps,sum;
984   const PetscScalar *b;
985   const MatScalar   *aa = a->a,*v;
986 
987   PetscFunctionBegin;
988   if (!n) PetscFunctionReturn(0);
989 
990   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
991   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
992   tmp  = a->solve_work;
993 
994   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
995   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
996 
997   /* forward solve the lower triangular */
998   tmp[0] = b[*r++];
999   tmps   = tmp;
1000   for (i=1; i<n; i++) {
1001     v   = aa + ai[i];
1002     vi  = aj + ai[i];
1003     nz  = a->diag[i] - ai[i];
1004     sum = b[*r++];
1005     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1006     tmp[i] = sum;
1007   }
1008 
1009   /* backward solve the upper triangular */
1010   for (i=n-1; i>=0; i--) {
1011     v   = aa + a->diag[i] + 1;
1012     vi  = aj + a->diag[i] + 1;
1013     nz  = ai[i+1] - a->diag[i] - 1;
1014     sum = tmp[i];
1015     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1016     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1017   }
1018 
1019   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1020   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1021   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1022   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1023   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace"
1029 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1030 {
1031   Mat_SeqAIJ      *a    = (Mat_SeqAIJ*)A->data;
1032   IS              iscol = a->col,isrow = a->row;
1033   PetscErrorCode  ierr;
1034   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1035   PetscInt        nz,neq;
1036   const PetscInt  *rout,*cout,*r,*c;
1037   PetscScalar     *x,*b,*tmp,*tmps,sum;
1038   const MatScalar *aa = a->a,*v;
1039   PetscBool       bisdense,xisdense;
1040 
1041   PetscFunctionBegin;
1042   if (!n) PetscFunctionReturn(0);
1043 
1044   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1045   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1046   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1047   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1048 
1049   ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
1050   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1051 
1052   tmp  = a->solve_work;
1053   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1054   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1055 
1056   for (neq=0; neq<B->cmap->n; neq++) {
1057     /* forward solve the lower triangular */
1058     tmp[0] = b[r[0]];
1059     tmps   = tmp;
1060     for (i=1; i<n; i++) {
1061       v   = aa + ai[i];
1062       vi  = aj + ai[i];
1063       nz  = a->diag[i] - ai[i];
1064       sum = b[r[i]];
1065       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1066       tmp[i] = sum;
1067     }
1068     /* backward solve the upper triangular */
1069     for (i=n-1; i>=0; i--) {
1070       v   = aa + a->diag[i] + 1;
1071       vi  = aj + a->diag[i] + 1;
1072       nz  = ai[i+1] - a->diag[i] - 1;
1073       sum = tmp[i];
1074       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1075       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1076     }
1077 
1078     b += n;
1079     x += n;
1080   }
1081   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1082   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1083   ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
1084   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1085   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatMatSolve_SeqAIJ"
1091 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1092 {
1093   Mat_SeqAIJ      *a    = (Mat_SeqAIJ*)A->data;
1094   IS              iscol = a->col,isrow = a->row;
1095   PetscErrorCode  ierr;
1096   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1097   PetscInt        nz,neq;
1098   const PetscInt  *rout,*cout,*r,*c;
1099   PetscScalar     *x,*b,*tmp,sum;
1100   const MatScalar *aa = a->a,*v;
1101   PetscBool       bisdense,xisdense;
1102 
1103   PetscFunctionBegin;
1104   if (!n) PetscFunctionReturn(0);
1105 
1106   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1107   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1108   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1109   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1110 
1111   ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
1112   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1113 
1114   tmp  = a->solve_work;
1115   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1116   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1117 
1118   for (neq=0; neq<B->cmap->n; neq++) {
1119     /* forward solve the lower triangular */
1120     tmp[0] = b[r[0]];
1121     v      = aa;
1122     vi     = aj;
1123     for (i=1; i<n; i++) {
1124       nz  = ai[i+1] - ai[i];
1125       sum = b[r[i]];
1126       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1127       tmp[i] = sum;
1128       v     += nz; vi += nz;
1129     }
1130 
1131     /* backward solve the upper triangular */
1132     for (i=n-1; i>=0; i--) {
1133       v   = aa + adiag[i+1]+1;
1134       vi  = aj + adiag[i+1]+1;
1135       nz  = adiag[i]-adiag[i+1]-1;
1136       sum = tmp[i];
1137       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1138       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1139     }
1140 
1141     b += n;
1142     x += n;
1143   }
1144   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1145   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1146   ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
1147   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1148   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 #undef __FUNCT__
1153 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
1154 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1155 {
1156   Mat_SeqAIJ      *a    = (Mat_SeqAIJ*)A->data;
1157   IS              iscol = a->col,isrow = a->row;
1158   PetscErrorCode  ierr;
1159   const PetscInt  *r,*c,*rout,*cout;
1160   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1161   PetscInt        nz,row;
1162   PetscScalar     *x,*b,*tmp,*tmps,sum;
1163   const MatScalar *aa = a->a,*v;
1164 
1165   PetscFunctionBegin;
1166   if (!n) PetscFunctionReturn(0);
1167 
1168   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1169   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1170   tmp  = a->solve_work;
1171 
1172   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1173   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1174 
1175   /* forward solve the lower triangular */
1176   tmp[0] = b[*r++];
1177   tmps   = tmp;
1178   for (row=1; row<n; row++) {
1179     i   = rout[row]; /* permuted row */
1180     v   = aa + ai[i];
1181     vi  = aj + ai[i];
1182     nz  = a->diag[i] - ai[i];
1183     sum = b[*r++];
1184     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1185     tmp[row] = sum;
1186   }
1187 
1188   /* backward solve the upper triangular */
1189   for (row=n-1; row>=0; row--) {
1190     i   = rout[row]; /* permuted row */
1191     v   = aa + a->diag[i] + 1;
1192     vi  = aj + a->diag[i] + 1;
1193     nz  = ai[i+1] - a->diag[i] - 1;
1194     sum = tmp[row];
1195     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1196     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1197   }
1198 
1199   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1200   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1201   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1202   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1203   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 /* ----------------------------------------------------------- */
1208 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1209 #undef __FUNCT__
1210 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace"
1211 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1212 {
1213   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1214   PetscErrorCode    ierr;
1215   PetscInt          n   = A->rmap->n;
1216   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1217   PetscScalar       *x;
1218   const PetscScalar *b;
1219   const MatScalar   *aa = a->a;
1220 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1221   PetscInt        adiag_i,i,nz,ai_i;
1222   const PetscInt  *vi;
1223   const MatScalar *v;
1224   PetscScalar     sum;
1225 #endif
1226 
1227   PetscFunctionBegin;
1228   if (!n) PetscFunctionReturn(0);
1229 
1230   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1231   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1232 
1233 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1234   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1235 #else
1236   /* forward solve the lower triangular */
1237   x[0] = b[0];
1238   for (i=1; i<n; i++) {
1239     ai_i = ai[i];
1240     v    = aa + ai_i;
1241     vi   = aj + ai_i;
1242     nz   = adiag[i] - ai_i;
1243     sum  = b[i];
1244     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1245     x[i] = sum;
1246   }
1247 
1248   /* backward solve the upper triangular */
1249   for (i=n-1; i>=0; i--) {
1250     adiag_i = adiag[i];
1251     v       = aa + adiag_i + 1;
1252     vi      = aj + adiag_i + 1;
1253     nz      = ai[i+1] - adiag_i - 1;
1254     sum     = x[i];
1255     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1256     x[i] = sum*aa[adiag_i];
1257   }
1258 #endif
1259   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1260   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1261   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace"
1267 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1268 {
1269   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1270   IS                iscol = a->col,isrow = a->row;
1271   PetscErrorCode    ierr;
1272   PetscInt          i, n = A->rmap->n,j;
1273   PetscInt          nz;
1274   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1275   PetscScalar       *x,*tmp,sum;
1276   const PetscScalar *b;
1277   const MatScalar   *aa = a->a,*v;
1278 
1279   PetscFunctionBegin;
1280   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1281 
1282   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1283   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1284   tmp  = a->solve_work;
1285 
1286   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1287   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1288 
1289   /* forward solve the lower triangular */
1290   tmp[0] = b[*r++];
1291   for (i=1; i<n; i++) {
1292     v   = aa + ai[i];
1293     vi  = aj + ai[i];
1294     nz  = a->diag[i] - ai[i];
1295     sum = b[*r++];
1296     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1297     tmp[i] = sum;
1298   }
1299 
1300   /* backward solve the upper triangular */
1301   for (i=n-1; i>=0; i--) {
1302     v   = aa + a->diag[i] + 1;
1303     vi  = aj + a->diag[i] + 1;
1304     nz  = ai[i+1] - a->diag[i] - 1;
1305     sum = tmp[i];
1306     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1307     tmp[i]   = sum*aa[a->diag[i]];
1308     x[*c--] += tmp[i];
1309   }
1310 
1311   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1312   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1313   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1314   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1315   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1321 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1322 {
1323   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1324   IS                iscol = a->col,isrow = a->row;
1325   PetscErrorCode    ierr;
1326   PetscInt          i, n = A->rmap->n,j;
1327   PetscInt          nz;
1328   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1329   PetscScalar       *x,*tmp,sum;
1330   const PetscScalar *b;
1331   const MatScalar   *aa = a->a,*v;
1332 
1333   PetscFunctionBegin;
1334   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1335 
1336   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1337   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1338   tmp  = a->solve_work;
1339 
1340   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1341   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1342 
1343   /* forward solve the lower triangular */
1344   tmp[0] = b[r[0]];
1345   v      = aa;
1346   vi     = aj;
1347   for (i=1; i<n; i++) {
1348     nz  = ai[i+1] - ai[i];
1349     sum = b[r[i]];
1350     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1351     tmp[i] = sum;
1352     v     += nz;
1353     vi    += nz;
1354   }
1355 
1356   /* backward solve the upper triangular */
1357   v  = aa + adiag[n-1];
1358   vi = aj + adiag[n-1];
1359   for (i=n-1; i>=0; i--) {
1360     nz  = adiag[i] - adiag[i+1] - 1;
1361     sum = tmp[i];
1362     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1363     tmp[i]   = sum*v[nz];
1364     x[c[i]] += tmp[i];
1365     v       += nz+1; vi += nz+1;
1366   }
1367 
1368   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1369   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1370   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1371   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1372   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 #undef __FUNCT__
1377 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace"
1378 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1379 {
1380   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1381   IS                iscol = a->col,isrow = a->row;
1382   PetscErrorCode    ierr;
1383   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1384   PetscInt          i,n = A->rmap->n,j;
1385   PetscInt          nz;
1386   PetscScalar       *x,*tmp,s1;
1387   const MatScalar   *aa = a->a,*v;
1388   const PetscScalar *b;
1389 
1390   PetscFunctionBegin;
1391   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1392   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1393   tmp  = a->solve_work;
1394 
1395   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1396   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1397 
1398   /* copy the b into temp work space according to permutation */
1399   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1400 
1401   /* forward solve the U^T */
1402   for (i=0; i<n; i++) {
1403     v   = aa + diag[i];
1404     vi  = aj + diag[i] + 1;
1405     nz  = ai[i+1] - diag[i] - 1;
1406     s1  = tmp[i];
1407     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1408     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1409     tmp[i] = s1;
1410   }
1411 
1412   /* backward solve the L^T */
1413   for (i=n-1; i>=0; i--) {
1414     v  = aa + diag[i] - 1;
1415     vi = aj + diag[i] - 1;
1416     nz = diag[i] - ai[i];
1417     s1 = tmp[i];
1418     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1419   }
1420 
1421   /* copy tmp into x according to permutation */
1422   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1423 
1424   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1425   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1426   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1427   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1428 
1429   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1435 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1436 {
1437   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1438   IS                iscol = a->col,isrow = a->row;
1439   PetscErrorCode    ierr;
1440   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1441   PetscInt          i,n = A->rmap->n,j;
1442   PetscInt          nz;
1443   PetscScalar       *x,*tmp,s1;
1444   const MatScalar   *aa = a->a,*v;
1445   const PetscScalar *b;
1446 
1447   PetscFunctionBegin;
1448   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1449   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1450   tmp  = a->solve_work;
1451 
1452   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1453   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1454 
1455   /* copy the b into temp work space according to permutation */
1456   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1457 
1458   /* forward solve the U^T */
1459   for (i=0; i<n; i++) {
1460     v   = aa + adiag[i+1] + 1;
1461     vi  = aj + adiag[i+1] + 1;
1462     nz  = adiag[i] - adiag[i+1] - 1;
1463     s1  = tmp[i];
1464     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1465     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1466     tmp[i] = s1;
1467   }
1468 
1469   /* backward solve the L^T */
1470   for (i=n-1; i>=0; i--) {
1471     v  = aa + ai[i];
1472     vi = aj + ai[i];
1473     nz = ai[i+1] - ai[i];
1474     s1 = tmp[i];
1475     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1476   }
1477 
1478   /* copy tmp into x according to permutation */
1479   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1480 
1481   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1482   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1483   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1484   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1485 
1486   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1487   PetscFunctionReturn(0);
1488 }
1489 
1490 #undef __FUNCT__
1491 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace"
1492 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1493 {
1494   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1495   IS                iscol = a->col,isrow = a->row;
1496   PetscErrorCode    ierr;
1497   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1498   PetscInt          i,n = A->rmap->n,j;
1499   PetscInt          nz;
1500   PetscScalar       *x,*tmp,s1;
1501   const MatScalar   *aa = a->a,*v;
1502   const PetscScalar *b;
1503 
1504   PetscFunctionBegin;
1505   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1506   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1507   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1508   tmp  = a->solve_work;
1509 
1510   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1511   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1512 
1513   /* copy the b into temp work space according to permutation */
1514   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1515 
1516   /* forward solve the U^T */
1517   for (i=0; i<n; i++) {
1518     v   = aa + diag[i];
1519     vi  = aj + diag[i] + 1;
1520     nz  = ai[i+1] - diag[i] - 1;
1521     s1  = tmp[i];
1522     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1523     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1524     tmp[i] = s1;
1525   }
1526 
1527   /* backward solve the L^T */
1528   for (i=n-1; i>=0; i--) {
1529     v  = aa + diag[i] - 1;
1530     vi = aj + diag[i] - 1;
1531     nz = diag[i] - ai[i];
1532     s1 = tmp[i];
1533     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1534   }
1535 
1536   /* copy tmp into x according to permutation */
1537   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1538 
1539   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1540   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1541   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1542   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1543 
1544   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1550 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1551 {
1552   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1553   IS                iscol = a->col,isrow = a->row;
1554   PetscErrorCode    ierr;
1555   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1556   PetscInt          i,n = A->rmap->n,j;
1557   PetscInt          nz;
1558   PetscScalar       *x,*tmp,s1;
1559   const MatScalar   *aa = a->a,*v;
1560   const PetscScalar *b;
1561 
1562   PetscFunctionBegin;
1563   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1564   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1565   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1566   tmp  = a->solve_work;
1567 
1568   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1569   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1570 
1571   /* copy the b into temp work space according to permutation */
1572   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1573 
1574   /* forward solve the U^T */
1575   for (i=0; i<n; i++) {
1576     v   = aa + adiag[i+1] + 1;
1577     vi  = aj + adiag[i+1] + 1;
1578     nz  = adiag[i] - adiag[i+1] - 1;
1579     s1  = tmp[i];
1580     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1581     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1582     tmp[i] = s1;
1583   }
1584 
1585 
1586   /* backward solve the L^T */
1587   for (i=n-1; i>=0; i--) {
1588     v  = aa + ai[i];
1589     vi = aj + ai[i];
1590     nz = ai[i+1] - ai[i];
1591     s1 = tmp[i];
1592     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1593   }
1594 
1595   /* copy tmp into x according to permutation */
1596   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1597 
1598   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1599   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1600   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1601   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1602 
1603   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1604   PetscFunctionReturn(0);
1605 }
1606 
1607 /* ----------------------------------------------------------------*/
1608 
1609 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool);
1610 
1611 /*
1612    ilu() under revised new data structure.
1613    Factored arrays bj and ba are stored as
1614      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1615 
1616    bi=fact->i is an array of size n+1, in which
1617    bi+
1618      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1619      bi[n]:  points to L(n-1,n-1)+1
1620 
1621   bdiag=fact->diag is an array of size n+1,in which
1622      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1623      bdiag[n]: points to entry of U(n-1,0)-1
1624 
1625    U(i,:) contains bdiag[i] as its last entry, i.e.,
1626     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1627 */
1628 #undef __FUNCT__
1629 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0"
1630 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1631 {
1632   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
1633   PetscErrorCode ierr;
1634   const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1635   PetscInt       i,j,k=0,nz,*bi,*bj,*bdiag;
1636   IS             isicol;
1637 
1638   PetscFunctionBegin;
1639   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1640   ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr);
1641   b    = (Mat_SeqAIJ*)(fact)->data;
1642 
1643   /* allocate matrix arrays for new data structure */
1644   ierr = PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);CHKERRQ(ierr);
1645   ierr = PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1646 
1647   b->singlemalloc = PETSC_TRUE;
1648   if (!b->diag) {
1649     ierr = PetscMalloc1(n+1,&b->diag);CHKERRQ(ierr);
1650     ierr = PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1651   }
1652   bdiag = b->diag;
1653 
1654   if (n > 0) {
1655     ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr);
1656   }
1657 
1658   /* set bi and bj with new data structure */
1659   bi = b->i;
1660   bj = b->j;
1661 
1662   /* L part */
1663   bi[0] = 0;
1664   for (i=0; i<n; i++) {
1665     nz      = adiag[i] - ai[i];
1666     bi[i+1] = bi[i] + nz;
1667     aj      = a->j + ai[i];
1668     for (j=0; j<nz; j++) {
1669       /*   *bj = aj[j]; bj++; */
1670       bj[k++] = aj[j];
1671     }
1672   }
1673 
1674   /* U part */
1675   bdiag[n] = bi[n]-1;
1676   for (i=n-1; i>=0; i--) {
1677     nz = ai[i+1] - adiag[i] - 1;
1678     aj = a->j + adiag[i] + 1;
1679     for (j=0; j<nz; j++) {
1680       /*      *bj = aj[j]; bj++; */
1681       bj[k++] = aj[j];
1682     }
1683     /* diag[i] */
1684     /*    *bj = i; bj++; */
1685     bj[k++]  = i;
1686     bdiag[i] = bdiag[i+1] + nz + 1;
1687   }
1688 
1689   fact->factortype             = MAT_FACTOR_ILU;
1690   fact->info.factor_mallocs    = 0;
1691   fact->info.fill_ratio_given  = info->fill;
1692   fact->info.fill_ratio_needed = 1.0;
1693   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1694   ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr);
1695 
1696   b       = (Mat_SeqAIJ*)(fact)->data;
1697   b->row  = isrow;
1698   b->col  = iscol;
1699   b->icol = isicol;
1700   ierr    = PetscMalloc1(fact->rmap->n+1,&b->solve_work);CHKERRQ(ierr);
1701   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1702   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1708 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1709 {
1710   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1711   IS                 isicol;
1712   PetscErrorCode     ierr;
1713   const PetscInt     *r,*ic;
1714   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1715   PetscInt           *bi,*cols,nnz,*cols_lvl;
1716   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1717   PetscInt           i,levels,diagonal_fill;
1718   PetscBool          col_identity,row_identity,missing;
1719   PetscReal          f;
1720   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1721   PetscBT            lnkbt;
1722   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1723   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1724   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1725 
1726   PetscFunctionBegin;
1727   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1728   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1729   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1730 
1731   levels = (PetscInt)info->levels;
1732   ierr   = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1733   ierr   = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1734   if (!levels && row_identity && col_identity) {
1735     /* special case: ilu(0) with natural ordering */
1736     ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1737     if (a->inode.size) {
1738       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1739     }
1740     PetscFunctionReturn(0);
1741   }
1742 
1743   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1744   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1745   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1746 
1747   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1748   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
1749   ierr  = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
1750   bi[0] = bdiag[0] = 0;
1751   ierr  = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr);
1752 
1753   /* create a linked list for storing column indices of the active row */
1754   nlnk = n + 1;
1755   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1756 
1757   /* initial FreeSpace size is f*(ai[n]+1) */
1758   f                 = info->fill;
1759   diagonal_fill     = (PetscInt)info->diagonal_fill;
1760   ierr              = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1761   current_space     = free_space;
1762   ierr              = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1763   current_space_lvl = free_space_lvl;
1764   for (i=0; i<n; i++) {
1765     nzi = 0;
1766     /* copy current row into linked list */
1767     nnz = ai[r[i]+1] - ai[r[i]];
1768     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1769     cols   = aj + ai[r[i]];
1770     lnk[i] = -1; /* marker to indicate if diagonal exists */
1771     ierr   = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1772     nzi   += nlnk;
1773 
1774     /* make sure diagonal entry is included */
1775     if (diagonal_fill && lnk[i] == -1) {
1776       fm = n;
1777       while (lnk[fm] < i) fm = lnk[fm];
1778       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1779       lnk[fm]    = i;
1780       lnk_lvl[i] = 0;
1781       nzi++; dcount++;
1782     }
1783 
1784     /* add pivot rows into the active row */
1785     nzbd = 0;
1786     prow = lnk[n];
1787     while (prow < i) {
1788       nnz      = bdiag[prow];
1789       cols     = bj_ptr[prow] + nnz + 1;
1790       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1791       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1792       ierr     = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1793       nzi     += nlnk;
1794       prow     = lnk[prow];
1795       nzbd++;
1796     }
1797     bdiag[i] = nzbd;
1798     bi[i+1]  = bi[i] + nzi;
1799     /* if free space is not available, make more free space */
1800     if (current_space->local_remaining<nzi) {
1801       nnz  = 2*nzi*(n - i); /* estimated and max additional space needed */
1802       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1803       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1804       reallocs++;
1805     }
1806 
1807     /* copy data into free_space and free_space_lvl, then initialize lnk */
1808     ierr         = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1809     bj_ptr[i]    = current_space->array;
1810     bjlvl_ptr[i] = current_space_lvl->array;
1811 
1812     /* make sure the active row i has diagonal entry */
1813     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1814 
1815     current_space->array               += nzi;
1816     current_space->local_used          += nzi;
1817     current_space->local_remaining     -= nzi;
1818     current_space_lvl->array           += nzi;
1819     current_space_lvl->local_used      += nzi;
1820     current_space_lvl->local_remaining -= nzi;
1821   }
1822 
1823   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1824   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1825   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1826   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
1827   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
1828 
1829   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1830   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1831   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
1832 
1833 #if defined(PETSC_USE_INFO)
1834   {
1835     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1836     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
1837     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
1838     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr);
1839     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1840     if (diagonal_fill) {
1841       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr);
1842     }
1843   }
1844 #endif
1845   /* put together the new matrix */
1846   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
1847   ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr);
1848   b    = (Mat_SeqAIJ*)(fact)->data;
1849 
1850   b->free_a       = PETSC_TRUE;
1851   b->free_ij      = PETSC_TRUE;
1852   b->singlemalloc = PETSC_FALSE;
1853 
1854   ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr);
1855 
1856   b->j    = bj;
1857   b->i    = bi;
1858   b->diag = bdiag;
1859   b->ilen = 0;
1860   b->imax = 0;
1861   b->row  = isrow;
1862   b->col  = iscol;
1863   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1864   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1865   b->icol = isicol;
1866 
1867   ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
1868   /* In b structure:  Free imax, ilen, old a, old j.
1869      Allocate bdiag, solve_work, new a, new j */
1870   ierr     = PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1871   b->maxnz = b->nz = bdiag[0]+1;
1872 
1873   (fact)->info.factor_mallocs    = reallocs;
1874   (fact)->info.fill_ratio_given  = f;
1875   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1876   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1877   if (a->inode.size) {
1878     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1879   }
1880   ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr);
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 #undef __FUNCT__
1885 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace"
1886 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1887 {
1888   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1889   IS                 isicol;
1890   PetscErrorCode     ierr;
1891   const PetscInt     *r,*ic;
1892   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1893   PetscInt           *bi,*cols,nnz,*cols_lvl;
1894   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1895   PetscInt           i,levels,diagonal_fill;
1896   PetscBool          col_identity,row_identity;
1897   PetscReal          f;
1898   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1899   PetscBT            lnkbt;
1900   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1901   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1902   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1903   PetscBool          missing;
1904 
1905   PetscFunctionBegin;
1906   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1907   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1908   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1909 
1910   f             = info->fill;
1911   levels        = (PetscInt)info->levels;
1912   diagonal_fill = (PetscInt)info->diagonal_fill;
1913 
1914   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1915 
1916   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1917   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1918   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1919     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr);
1920 
1921     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1922     if (a->inode.size) {
1923       (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1924     }
1925     fact->factortype               = MAT_FACTOR_ILU;
1926     (fact)->info.factor_mallocs    = 0;
1927     (fact)->info.fill_ratio_given  = info->fill;
1928     (fact)->info.fill_ratio_needed = 1.0;
1929 
1930     b    = (Mat_SeqAIJ*)(fact)->data;
1931     b->row  = isrow;
1932     b->col  = iscol;
1933     b->icol = isicol;
1934     ierr    = PetscMalloc1((fact)->rmap->n+1,&b->solve_work);CHKERRQ(ierr);
1935     ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1936     ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1937     PetscFunctionReturn(0);
1938   }
1939 
1940   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1941   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1942 
1943   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1944   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
1945   ierr  = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
1946   bi[0] = bdiag[0] = 0;
1947 
1948   ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr);
1949 
1950   /* create a linked list for storing column indices of the active row */
1951   nlnk = n + 1;
1952   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1953 
1954   /* initial FreeSpace size is f*(ai[n]+1) */
1955   ierr              = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1956   current_space     = free_space;
1957   ierr              = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1958   current_space_lvl = free_space_lvl;
1959 
1960   for (i=0; i<n; i++) {
1961     nzi = 0;
1962     /* copy current row into linked list */
1963     nnz = ai[r[i]+1] - ai[r[i]];
1964     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1965     cols   = aj + ai[r[i]];
1966     lnk[i] = -1; /* marker to indicate if diagonal exists */
1967     ierr   = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1968     nzi   += nlnk;
1969 
1970     /* make sure diagonal entry is included */
1971     if (diagonal_fill && lnk[i] == -1) {
1972       fm = n;
1973       while (lnk[fm] < i) fm = lnk[fm];
1974       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1975       lnk[fm]    = i;
1976       lnk_lvl[i] = 0;
1977       nzi++; dcount++;
1978     }
1979 
1980     /* add pivot rows into the active row */
1981     nzbd = 0;
1982     prow = lnk[n];
1983     while (prow < i) {
1984       nnz      = bdiag[prow];
1985       cols     = bj_ptr[prow] + nnz + 1;
1986       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1987       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1988       ierr     = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1989       nzi     += nlnk;
1990       prow     = lnk[prow];
1991       nzbd++;
1992     }
1993     bdiag[i] = nzbd;
1994     bi[i+1]  = bi[i] + nzi;
1995 
1996     /* if free space is not available, make more free space */
1997     if (current_space->local_remaining<nzi) {
1998       nnz  = nzi*(n - i); /* estimated and max additional space needed */
1999       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
2000       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
2001       reallocs++;
2002     }
2003 
2004     /* copy data into free_space and free_space_lvl, then initialize lnk */
2005     ierr         = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2006     bj_ptr[i]    = current_space->array;
2007     bjlvl_ptr[i] = current_space_lvl->array;
2008 
2009     /* make sure the active row i has diagonal entry */
2010     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2011 
2012     current_space->array               += nzi;
2013     current_space->local_used          += nzi;
2014     current_space->local_remaining     -= nzi;
2015     current_space_lvl->array           += nzi;
2016     current_space_lvl->local_used      += nzi;
2017     current_space_lvl->local_remaining -= nzi;
2018   }
2019 
2020   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2021   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2022 
2023   /* destroy list of free space and other temporary arrays */
2024   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
2025   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */
2026   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2027   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2028   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
2029 
2030 #if defined(PETSC_USE_INFO)
2031   {
2032     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2033     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
2034     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2035     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr);
2036     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2037     if (diagonal_fill) {
2038       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr);
2039     }
2040   }
2041 #endif
2042 
2043   /* put together the new matrix */
2044   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
2045   ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr);
2046   b    = (Mat_SeqAIJ*)(fact)->data;
2047 
2048   b->free_a       = PETSC_TRUE;
2049   b->free_ij      = PETSC_TRUE;
2050   b->singlemalloc = PETSC_FALSE;
2051 
2052   ierr = PetscMalloc1(bi[n],&b->a);CHKERRQ(ierr);
2053   b->j = bj;
2054   b->i = bi;
2055   for (i=0; i<n; i++) bdiag[i] += bi[i];
2056   b->diag = bdiag;
2057   b->ilen = 0;
2058   b->imax = 0;
2059   b->row  = isrow;
2060   b->col  = iscol;
2061   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2062   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2063   b->icol = isicol;
2064   ierr    = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
2065   /* In b structure:  Free imax, ilen, old a, old j.
2066      Allocate bdiag, solve_work, new a, new j */
2067   ierr     = PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
2068   b->maxnz = b->nz = bi[n];
2069 
2070   (fact)->info.factor_mallocs    = reallocs;
2071   (fact)->info.fill_ratio_given  = f;
2072   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2073   (fact)->ops->lufactornumeric   =  MatLUFactorNumeric_SeqAIJ_inplace;
2074   if (a->inode.size) {
2075     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2076   }
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 #undef __FUNCT__
2081 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
2082 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2083 {
2084   Mat            C = B;
2085   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2086   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2087   IS             ip=b->row,iip = b->icol;
2088   PetscErrorCode ierr;
2089   const PetscInt *rip,*riip;
2090   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2091   PetscInt       *ai=a->i,*aj=a->j;
2092   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2093   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2094   PetscBool      perm_identity;
2095   FactorShiftCtx sctx;
2096   PetscReal      rs;
2097   MatScalar      d,*v;
2098 
2099   PetscFunctionBegin;
2100   /* MatPivotSetUp(): initialize shift context sctx */
2101   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2102 
2103   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2104     sctx.shift_top = info->zeropivot;
2105     for (i=0; i<mbs; i++) {
2106       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2107       d  = (aa)[a->diag[i]];
2108       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2109       v  = aa+ai[i];
2110       nz = ai[i+1] - ai[i];
2111       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2112       if (rs>sctx.shift_top) sctx.shift_top = rs;
2113     }
2114     sctx.shift_top *= 1.1;
2115     sctx.nshift_max = 5;
2116     sctx.shift_lo   = 0.;
2117     sctx.shift_hi   = 1.;
2118   }
2119 
2120   ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2121   ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2122 
2123   /* allocate working arrays
2124      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2125      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2126   */
2127   ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);CHKERRQ(ierr);
2128 
2129   do {
2130     sctx.newshift = PETSC_FALSE;
2131 
2132     for (i=0; i<mbs; i++) c2r[i] = mbs;
2133     if (mbs) il[0] = 0;
2134 
2135     for (k = 0; k<mbs; k++) {
2136       /* zero rtmp */
2137       nz    = bi[k+1] - bi[k];
2138       bjtmp = bj + bi[k];
2139       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2140 
2141       /* load in initial unfactored row */
2142       bval = ba + bi[k];
2143       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2144       for (j = jmin; j < jmax; j++) {
2145         col = riip[aj[j]];
2146         if (col >= k) { /* only take upper triangular entry */
2147           rtmp[col] = aa[j];
2148           *bval++   = 0.0; /* for in-place factorization */
2149         }
2150       }
2151       /* shift the diagonal of the matrix: ZeropivotApply() */
2152       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2153 
2154       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2155       dk = rtmp[k];
2156       i  = c2r[k]; /* first row to be added to k_th row  */
2157 
2158       while (i < k) {
2159         nexti = c2r[i]; /* next row to be added to k_th row */
2160 
2161         /* compute multiplier, update diag(k) and U(i,k) */
2162         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2163         uikdi   = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2164         dk     += uikdi*ba[ili]; /* update diag[k] */
2165         ba[ili] = uikdi; /* -U(i,k) */
2166 
2167         /* add multiple of row i to k-th row */
2168         jmin = ili + 1; jmax = bi[i+1];
2169         if (jmin < jmax) {
2170           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2171           /* update il and c2r for row i */
2172           il[i] = jmin;
2173           j     = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2174         }
2175         i = nexti;
2176       }
2177 
2178       /* copy data into U(k,:) */
2179       rs   = 0.0;
2180       jmin = bi[k]; jmax = bi[k+1]-1;
2181       if (jmin < jmax) {
2182         for (j=jmin; j<jmax; j++) {
2183           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2184         }
2185         /* add the k-th row into il and c2r */
2186         il[k] = jmin;
2187         i     = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2188       }
2189 
2190       /* MatPivotCheck() */
2191       sctx.rs = rs;
2192       sctx.pv = dk;
2193       ierr    = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
2194       if (sctx.newshift) break;
2195       dk = sctx.pv;
2196 
2197       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2198     }
2199   } while (sctx.newshift);
2200 
2201   ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr);
2202   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2203   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2204 
2205   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2206   if (perm_identity) {
2207     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2208     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2209     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2210     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2211   } else {
2212     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2213     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2214     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2215     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2216   }
2217 
2218   C->assembled    = PETSC_TRUE;
2219   C->preallocated = PETSC_TRUE;
2220 
2221   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2222 
2223   /* MatPivotView() */
2224   if (sctx.nshift) {
2225     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2226       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
2227     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2228       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
2229     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2230       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr);
2231     }
2232   }
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 #undef __FUNCT__
2237 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace"
2238 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2239 {
2240   Mat            C = B;
2241   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2242   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2243   IS             ip=b->row,iip = b->icol;
2244   PetscErrorCode ierr;
2245   const PetscInt *rip,*riip;
2246   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2247   PetscInt       *ai=a->i,*aj=a->j;
2248   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2249   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2250   PetscBool      perm_identity;
2251   FactorShiftCtx sctx;
2252   PetscReal      rs;
2253   MatScalar      d,*v;
2254 
2255   PetscFunctionBegin;
2256   /* MatPivotSetUp(): initialize shift context sctx */
2257   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2258 
2259   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2260     sctx.shift_top = info->zeropivot;
2261     for (i=0; i<mbs; i++) {
2262       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2263       d  = (aa)[a->diag[i]];
2264       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2265       v  = aa+ai[i];
2266       nz = ai[i+1] - ai[i];
2267       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2268       if (rs>sctx.shift_top) sctx.shift_top = rs;
2269     }
2270     sctx.shift_top *= 1.1;
2271     sctx.nshift_max = 5;
2272     sctx.shift_lo   = 0.;
2273     sctx.shift_hi   = 1.;
2274   }
2275 
2276   ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2277   ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2278 
2279   /* initialization */
2280   ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);CHKERRQ(ierr);
2281 
2282   do {
2283     sctx.newshift = PETSC_FALSE;
2284 
2285     for (i=0; i<mbs; i++) jl[i] = mbs;
2286     il[0] = 0;
2287 
2288     for (k = 0; k<mbs; k++) {
2289       /* zero rtmp */
2290       nz    = bi[k+1] - bi[k];
2291       bjtmp = bj + bi[k];
2292       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2293 
2294       bval = ba + bi[k];
2295       /* initialize k-th row by the perm[k]-th row of A */
2296       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2297       for (j = jmin; j < jmax; j++) {
2298         col = riip[aj[j]];
2299         if (col >= k) { /* only take upper triangular entry */
2300           rtmp[col] = aa[j];
2301           *bval++   = 0.0; /* for in-place factorization */
2302         }
2303       }
2304       /* shift the diagonal of the matrix */
2305       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2306 
2307       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2308       dk = rtmp[k];
2309       i  = jl[k]; /* first row to be added to k_th row  */
2310 
2311       while (i < k) {
2312         nexti = jl[i]; /* next row to be added to k_th row */
2313 
2314         /* compute multiplier, update diag(k) and U(i,k) */
2315         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2316         uikdi   = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2317         dk     += uikdi*ba[ili];
2318         ba[ili] = uikdi; /* -U(i,k) */
2319 
2320         /* add multiple of row i to k-th row */
2321         jmin = ili + 1; jmax = bi[i+1];
2322         if (jmin < jmax) {
2323           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2324           /* update il and jl for row i */
2325           il[i] = jmin;
2326           j     = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2327         }
2328         i = nexti;
2329       }
2330 
2331       /* shift the diagonals when zero pivot is detected */
2332       /* compute rs=sum of abs(off-diagonal) */
2333       rs   = 0.0;
2334       jmin = bi[k]+1;
2335       nz   = bi[k+1] - jmin;
2336       bcol = bj + jmin;
2337       for (j=0; j<nz; j++) {
2338         rs += PetscAbsScalar(rtmp[bcol[j]]);
2339       }
2340 
2341       sctx.rs = rs;
2342       sctx.pv = dk;
2343       ierr    = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr);
2344       if (sctx.newshift) break;
2345       dk = sctx.pv;
2346 
2347       /* copy data into U(k,:) */
2348       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2349       jmin      = bi[k]+1; jmax = bi[k+1];
2350       if (jmin < jmax) {
2351         for (j=jmin; j<jmax; j++) {
2352           col = bj[j]; ba[j] = rtmp[col];
2353         }
2354         /* add the k-th row into il and jl */
2355         il[k] = jmin;
2356         i     = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2357       }
2358     }
2359   } while (sctx.newshift);
2360 
2361   ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr);
2362   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2363   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2364 
2365   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2366   if (perm_identity) {
2367     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2368     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2369     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2370     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2371   } else {
2372     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2373     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2374     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2375     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2376   }
2377 
2378   C->assembled    = PETSC_TRUE;
2379   C->preallocated = PETSC_TRUE;
2380 
2381   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2382   if (sctx.nshift) {
2383     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2384       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
2385     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2386       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
2387     }
2388   }
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 /*
2393    icc() under revised new data structure.
2394    Factored arrays bj and ba are stored as
2395      U(0,:),...,U(i,:),U(n-1,:)
2396 
2397    ui=fact->i is an array of size n+1, in which
2398    ui+
2399      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2400      ui[n]:  points to U(n-1,n-1)+1
2401 
2402   udiag=fact->diag is an array of size n,in which
2403      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2404 
2405    U(i,:) contains udiag[i] as its last entry, i.e.,
2406     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2407 */
2408 
2409 #undef __FUNCT__
2410 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
2411 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2412 {
2413   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2414   Mat_SeqSBAIJ       *b;
2415   PetscErrorCode     ierr;
2416   PetscBool          perm_identity,missing;
2417   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2418   const PetscInt     *rip,*riip;
2419   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2420   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2421   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2422   PetscReal          fill          =info->fill,levels=info->levels;
2423   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2424   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2425   PetscBT            lnkbt;
2426   IS                 iperm;
2427 
2428   PetscFunctionBegin;
2429   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2430   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2431   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2432   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2433   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2434 
2435   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
2436   ierr  = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr);
2437   ui[0] = 0;
2438 
2439   /* ICC(0) without matrix ordering: simply rearrange column indices */
2440   if (!levels && perm_identity) {
2441     for (i=0; i<am; i++) {
2442       ncols    = ai[i+1] - a->diag[i];
2443       ui[i+1]  = ui[i] + ncols;
2444       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2445     }
2446     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2447     cols = uj;
2448     for (i=0; i<am; i++) {
2449       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2450       ncols = ai[i+1] - a->diag[i] -1;
2451       for (j=0; j<ncols; j++) *cols++ = aj[j];
2452       *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2453     }
2454   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2455     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2456     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2457 
2458     /* initialization */
2459     ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr);
2460 
2461     /* jl: linked list for storing indices of the pivot rows
2462        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2463     ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr);
2464     for (i=0; i<am; i++) {
2465       jl[i] = am; il[i] = 0;
2466     }
2467 
2468     /* create and initialize a linked list for storing column indices of the active row k */
2469     nlnk = am + 1;
2470     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2471 
2472     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2473     ierr              = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2474     current_space     = free_space;
2475     ierr              = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr);
2476     current_space_lvl = free_space_lvl;
2477 
2478     for (k=0; k<am; k++) {  /* for each active row k */
2479       /* initialize lnk by the column indices of row rip[k] of A */
2480       nzk   = 0;
2481       ncols = ai[rip[k]+1] - ai[rip[k]];
2482       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2483       ncols_upper = 0;
2484       for (j=0; j<ncols; j++) {
2485         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2486         if (riip[i] >= k) { /* only take upper triangular entry */
2487           ajtmp[ncols_upper] = i;
2488           ncols_upper++;
2489         }
2490       }
2491       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2492       nzk += nlnk;
2493 
2494       /* update lnk by computing fill-in for each pivot row to be merged in */
2495       prow = jl[k]; /* 1st pivot row */
2496 
2497       while (prow < k) {
2498         nextprow = jl[prow];
2499 
2500         /* merge prow into k-th row */
2501         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2502         jmax  = ui[prow+1];
2503         ncols = jmax-jmin;
2504         i     = jmin - ui[prow];
2505         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2506         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2507         j     = *(uj - 1);
2508         ierr  = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2509         nzk  += nlnk;
2510 
2511         /* update il and jl for prow */
2512         if (jmin < jmax) {
2513           il[prow] = jmin;
2514           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2515         }
2516         prow = nextprow;
2517       }
2518 
2519       /* if free space is not available, make more free space */
2520       if (current_space->local_remaining<nzk) {
2521         i    = am - k + 1; /* num of unfactored rows */
2522         i   *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2523         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2524         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2525         reallocs++;
2526       }
2527 
2528       /* copy data into free_space and free_space_lvl, then initialize lnk */
2529       if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2530       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2531 
2532       /* add the k-th row into il and jl */
2533       if (nzk > 1) {
2534         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2535         jl[k] = jl[i]; jl[i] = k;
2536         il[k] = ui[k] + 1;
2537       }
2538       uj_ptr[k]     = current_space->array;
2539       uj_lvl_ptr[k] = current_space_lvl->array;
2540 
2541       current_space->array           += nzk;
2542       current_space->local_used      += nzk;
2543       current_space->local_remaining -= nzk;
2544 
2545       current_space_lvl->array           += nzk;
2546       current_space_lvl->local_used      += nzk;
2547       current_space_lvl->local_remaining -= nzk;
2548 
2549       ui[k+1] = ui[k] + nzk;
2550     }
2551 
2552     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2553     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2554     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2555     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2556 
2557     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2558     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2559     ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor  */
2560     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2561     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2562 
2563   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2564 
2565   /* put together the new matrix in MATSEQSBAIJ format */
2566   b               = (Mat_SeqSBAIJ*)(fact)->data;
2567   b->singlemalloc = PETSC_FALSE;
2568 
2569   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
2570 
2571   b->j             = uj;
2572   b->i             = ui;
2573   b->diag          = udiag;
2574   b->free_diag     = PETSC_TRUE;
2575   b->ilen          = 0;
2576   b->imax          = 0;
2577   b->row           = perm;
2578   b->col           = perm;
2579   ierr             = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2580   ierr             = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2581   b->icol          = iperm;
2582   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2583 
2584   ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
2585   ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2586 
2587   b->maxnz   = b->nz = ui[am];
2588   b->free_a  = PETSC_TRUE;
2589   b->free_ij = PETSC_TRUE;
2590 
2591   fact->info.factor_mallocs   = reallocs;
2592   fact->info.fill_ratio_given = fill;
2593   if (ai[am] != 0) {
2594     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2595     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2596   } else {
2597     fact->info.fill_ratio_needed = 0.0;
2598   }
2599 #if defined(PETSC_USE_INFO)
2600   if (ai[am] != 0) {
2601     PetscReal af = fact->info.fill_ratio_needed;
2602     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
2603     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2604     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
2605   } else {
2606     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2607   }
2608 #endif
2609   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 #undef __FUNCT__
2614 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace"
2615 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2616 {
2617   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2618   Mat_SeqSBAIJ       *b;
2619   PetscErrorCode     ierr;
2620   PetscBool          perm_identity,missing;
2621   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2622   const PetscInt     *rip,*riip;
2623   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2624   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2625   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2626   PetscReal          fill          =info->fill,levels=info->levels;
2627   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2628   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2629   PetscBT            lnkbt;
2630   IS                 iperm;
2631 
2632   PetscFunctionBegin;
2633   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2634   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2635   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2636   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2637   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2638 
2639   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
2640   ierr  = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr);
2641   ui[0] = 0;
2642 
2643   /* ICC(0) without matrix ordering: simply copies fill pattern */
2644   if (!levels && perm_identity) {
2645 
2646     for (i=0; i<am; i++) {
2647       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2648       udiag[i] = ui[i];
2649     }
2650     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2651     cols = uj;
2652     for (i=0; i<am; i++) {
2653       aj    = a->j + a->diag[i];
2654       ncols = ui[i+1] - ui[i];
2655       for (j=0; j<ncols; j++) *cols++ = *aj++;
2656     }
2657   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2658     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2659     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2660 
2661     /* initialization */
2662     ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr);
2663 
2664     /* jl: linked list for storing indices of the pivot rows
2665        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2666     ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr);
2667     for (i=0; i<am; i++) {
2668       jl[i] = am; il[i] = 0;
2669     }
2670 
2671     /* create and initialize a linked list for storing column indices of the active row k */
2672     nlnk = am + 1;
2673     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2674 
2675     /* initial FreeSpace size is fill*(ai[am]+1) */
2676     ierr              = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2677     current_space     = free_space;
2678     ierr              = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2679     current_space_lvl = free_space_lvl;
2680 
2681     for (k=0; k<am; k++) {  /* for each active row k */
2682       /* initialize lnk by the column indices of row rip[k] of A */
2683       nzk   = 0;
2684       ncols = ai[rip[k]+1] - ai[rip[k]];
2685       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2686       ncols_upper = 0;
2687       for (j=0; j<ncols; j++) {
2688         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2689         if (riip[i] >= k) { /* only take upper triangular entry */
2690           ajtmp[ncols_upper] = i;
2691           ncols_upper++;
2692         }
2693       }
2694       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2695       nzk += nlnk;
2696 
2697       /* update lnk by computing fill-in for each pivot row to be merged in */
2698       prow = jl[k]; /* 1st pivot row */
2699 
2700       while (prow < k) {
2701         nextprow = jl[prow];
2702 
2703         /* merge prow into k-th row */
2704         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2705         jmax  = ui[prow+1];
2706         ncols = jmax-jmin;
2707         i     = jmin - ui[prow];
2708         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2709         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2710         j     = *(uj - 1);
2711         ierr  = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2712         nzk  += nlnk;
2713 
2714         /* update il and jl for prow */
2715         if (jmin < jmax) {
2716           il[prow] = jmin;
2717           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2718         }
2719         prow = nextprow;
2720       }
2721 
2722       /* if free space is not available, make more free space */
2723       if (current_space->local_remaining<nzk) {
2724         i    = am - k + 1; /* num of unfactored rows */
2725         i   *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2726         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2727         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2728         reallocs++;
2729       }
2730 
2731       /* copy data into free_space and free_space_lvl, then initialize lnk */
2732       if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2733       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2734 
2735       /* add the k-th row into il and jl */
2736       if (nzk > 1) {
2737         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2738         jl[k] = jl[i]; jl[i] = k;
2739         il[k] = ui[k] + 1;
2740       }
2741       uj_ptr[k]     = current_space->array;
2742       uj_lvl_ptr[k] = current_space_lvl->array;
2743 
2744       current_space->array           += nzk;
2745       current_space->local_used      += nzk;
2746       current_space->local_remaining -= nzk;
2747 
2748       current_space_lvl->array           += nzk;
2749       current_space_lvl->local_used      += nzk;
2750       current_space_lvl->local_remaining -= nzk;
2751 
2752       ui[k+1] = ui[k] + nzk;
2753     }
2754 
2755 #if defined(PETSC_USE_INFO)
2756     if (ai[am] != 0) {
2757       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2758       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
2759       ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2760       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
2761     } else {
2762       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2763     }
2764 #endif
2765 
2766     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2767     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2768     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2769     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2770 
2771     /* destroy list of free space and other temporary array(s) */
2772     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2773     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2774     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2775     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2776 
2777   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2778 
2779   /* put together the new matrix in MATSEQSBAIJ format */
2780 
2781   b               = (Mat_SeqSBAIJ*)fact->data;
2782   b->singlemalloc = PETSC_FALSE;
2783 
2784   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
2785 
2786   b->j         = uj;
2787   b->i         = ui;
2788   b->diag      = udiag;
2789   b->free_diag = PETSC_TRUE;
2790   b->ilen      = 0;
2791   b->imax      = 0;
2792   b->row       = perm;
2793   b->col       = perm;
2794 
2795   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2796   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2797 
2798   b->icol          = iperm;
2799   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2800   ierr             = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
2801   ierr             = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2802   b->maxnz         = b->nz = ui[am];
2803   b->free_a        = PETSC_TRUE;
2804   b->free_ij       = PETSC_TRUE;
2805 
2806   fact->info.factor_mallocs   = reallocs;
2807   fact->info.fill_ratio_given = fill;
2808   if (ai[am] != 0) {
2809     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2810   } else {
2811     fact->info.fill_ratio_needed = 0.0;
2812   }
2813   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2814   PetscFunctionReturn(0);
2815 }
2816 
2817 #undef __FUNCT__
2818 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
2819 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2820 {
2821   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2822   Mat_SeqSBAIJ       *b;
2823   PetscErrorCode     ierr;
2824   PetscBool          perm_identity,missing;
2825   PetscReal          fill = info->fill;
2826   const PetscInt     *rip,*riip;
2827   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2828   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2829   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2830   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2831   PetscBT            lnkbt;
2832   IS                 iperm;
2833 
2834   PetscFunctionBegin;
2835   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2836   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
2837   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2838 
2839   /* check whether perm is the identity mapping */
2840   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2841   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2842   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2843   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2844 
2845   /* initialization */
2846   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
2847   ierr  = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr);
2848   ui[0] = 0;
2849 
2850   /* jl: linked list for storing indices of the pivot rows
2851      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2852   ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr);
2853   for (i=0; i<am; i++) {
2854     jl[i] = am; il[i] = 0;
2855   }
2856 
2857   /* create and initialize a linked list for storing column indices of the active row k */
2858   nlnk = am + 1;
2859   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2860 
2861   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2862   ierr          = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2863   current_space = free_space;
2864 
2865   for (k=0; k<am; k++) {  /* for each active row k */
2866     /* initialize lnk by the column indices of row rip[k] of A */
2867     nzk   = 0;
2868     ncols = ai[rip[k]+1] - ai[rip[k]];
2869     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2870     ncols_upper = 0;
2871     for (j=0; j<ncols; j++) {
2872       i = riip[*(aj + ai[rip[k]] + j)];
2873       if (i >= k) { /* only take upper triangular entry */
2874         cols[ncols_upper] = i;
2875         ncols_upper++;
2876       }
2877     }
2878     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2879     nzk += nlnk;
2880 
2881     /* update lnk by computing fill-in for each pivot row to be merged in */
2882     prow = jl[k]; /* 1st pivot row */
2883 
2884     while (prow < k) {
2885       nextprow = jl[prow];
2886       /* merge prow into k-th row */
2887       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2888       jmax   = ui[prow+1];
2889       ncols  = jmax-jmin;
2890       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2891       ierr   = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2892       nzk   += nlnk;
2893 
2894       /* update il and jl for prow */
2895       if (jmin < jmax) {
2896         il[prow] = jmin;
2897         j        = *uj_ptr;
2898         jl[prow] = jl[j];
2899         jl[j]    = prow;
2900       }
2901       prow = nextprow;
2902     }
2903 
2904     /* if free space is not available, make more free space */
2905     if (current_space->local_remaining<nzk) {
2906       i    = am - k + 1; /* num of unfactored rows */
2907       i   *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2908       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2909       reallocs++;
2910     }
2911 
2912     /* copy data into free space, then initialize lnk */
2913     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
2914 
2915     /* add the k-th row into il and jl */
2916     if (nzk > 1) {
2917       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2918       jl[k] = jl[i]; jl[i] = k;
2919       il[k] = ui[k] + 1;
2920     }
2921     ui_ptr[k] = current_space->array;
2922 
2923     current_space->array           += nzk;
2924     current_space->local_used      += nzk;
2925     current_space->local_remaining -= nzk;
2926 
2927     ui[k+1] = ui[k] + nzk;
2928   }
2929 
2930   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2931   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2932   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
2933 
2934   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2935   ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2936   ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */
2937   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2938 
2939   /* put together the new matrix in MATSEQSBAIJ format */
2940 
2941   b               = (Mat_SeqSBAIJ*)fact->data;
2942   b->singlemalloc = PETSC_FALSE;
2943   b->free_a       = PETSC_TRUE;
2944   b->free_ij      = PETSC_TRUE;
2945 
2946   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
2947 
2948   b->j         = uj;
2949   b->i         = ui;
2950   b->diag      = udiag;
2951   b->free_diag = PETSC_TRUE;
2952   b->ilen      = 0;
2953   b->imax      = 0;
2954   b->row       = perm;
2955   b->col       = perm;
2956 
2957   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2958   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2959 
2960   b->icol          = iperm;
2961   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2962 
2963   ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
2964   ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2965 
2966   b->maxnz = b->nz = ui[am];
2967 
2968   fact->info.factor_mallocs   = reallocs;
2969   fact->info.fill_ratio_given = fill;
2970   if (ai[am] != 0) {
2971     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2972     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2973   } else {
2974     fact->info.fill_ratio_needed = 0.0;
2975   }
2976 #if defined(PETSC_USE_INFO)
2977   if (ai[am] != 0) {
2978     PetscReal af = fact->info.fill_ratio_needed;
2979     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
2980     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2981     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
2982   } else {
2983     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2984   }
2985 #endif
2986   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2987   PetscFunctionReturn(0);
2988 }
2989 
2990 #undef __FUNCT__
2991 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace"
2992 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2993 {
2994   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2995   Mat_SeqSBAIJ       *b;
2996   PetscErrorCode     ierr;
2997   PetscBool          perm_identity,missing;
2998   PetscReal          fill = info->fill;
2999   const PetscInt     *rip,*riip;
3000   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
3001   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
3002   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
3003   PetscFreeSpaceList free_space=NULL,current_space=NULL;
3004   PetscBT            lnkbt;
3005   IS                 iperm;
3006 
3007   PetscFunctionBegin;
3008   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
3009   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3010   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3011 
3012   /* check whether perm is the identity mapping */
3013   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
3014   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
3015   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
3016   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
3017 
3018   /* initialization */
3019   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
3020   ui[0] = 0;
3021 
3022   /* jl: linked list for storing indices of the pivot rows
3023      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3024   ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr);
3025   for (i=0; i<am; i++) {
3026     jl[i] = am; il[i] = 0;
3027   }
3028 
3029   /* create and initialize a linked list for storing column indices of the active row k */
3030   nlnk = am + 1;
3031   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3032 
3033   /* initial FreeSpace size is fill*(ai[am]+1) */
3034   ierr          = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
3035   current_space = free_space;
3036 
3037   for (k=0; k<am; k++) {  /* for each active row k */
3038     /* initialize lnk by the column indices of row rip[k] of A */
3039     nzk   = 0;
3040     ncols = ai[rip[k]+1] - ai[rip[k]];
3041     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3042     ncols_upper = 0;
3043     for (j=0; j<ncols; j++) {
3044       i = riip[*(aj + ai[rip[k]] + j)];
3045       if (i >= k) { /* only take upper triangular entry */
3046         cols[ncols_upper] = i;
3047         ncols_upper++;
3048       }
3049     }
3050     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3051     nzk += nlnk;
3052 
3053     /* update lnk by computing fill-in for each pivot row to be merged in */
3054     prow = jl[k]; /* 1st pivot row */
3055 
3056     while (prow < k) {
3057       nextprow = jl[prow];
3058       /* merge prow into k-th row */
3059       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3060       jmax   = ui[prow+1];
3061       ncols  = jmax-jmin;
3062       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3063       ierr   = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3064       nzk   += nlnk;
3065 
3066       /* update il and jl for prow */
3067       if (jmin < jmax) {
3068         il[prow] = jmin;
3069         j        = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3070       }
3071       prow = nextprow;
3072     }
3073 
3074     /* if free space is not available, make more free space */
3075     if (current_space->local_remaining<nzk) {
3076       i    = am - k + 1; /* num of unfactored rows */
3077       i    = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3078       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
3079       reallocs++;
3080     }
3081 
3082     /* copy data into free space, then initialize lnk */
3083     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3084 
3085     /* add the k-th row into il and jl */
3086     if (nzk-1 > 0) {
3087       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3088       jl[k] = jl[i]; jl[i] = k;
3089       il[k] = ui[k] + 1;
3090     }
3091     ui_ptr[k] = current_space->array;
3092 
3093     current_space->array           += nzk;
3094     current_space->local_used      += nzk;
3095     current_space->local_remaining -= nzk;
3096 
3097     ui[k+1] = ui[k] + nzk;
3098   }
3099 
3100 #if defined(PETSC_USE_INFO)
3101   if (ai[am] != 0) {
3102     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3103     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
3104     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
3105     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
3106   } else {
3107     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
3108   }
3109 #endif
3110 
3111   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
3112   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
3113   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
3114 
3115   /* destroy list of free space and other temporary array(s) */
3116   ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
3117   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
3118   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3119 
3120   /* put together the new matrix in MATSEQSBAIJ format */
3121 
3122   b               = (Mat_SeqSBAIJ*)fact->data;
3123   b->singlemalloc = PETSC_FALSE;
3124   b->free_a       = PETSC_TRUE;
3125   b->free_ij      = PETSC_TRUE;
3126 
3127   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
3128 
3129   b->j    = uj;
3130   b->i    = ui;
3131   b->diag = 0;
3132   b->ilen = 0;
3133   b->imax = 0;
3134   b->row  = perm;
3135   b->col  = perm;
3136 
3137   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3138   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3139 
3140   b->icol          = iperm;
3141   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3142 
3143   ierr     = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
3144   ierr     = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3145   b->maxnz = b->nz = ui[am];
3146 
3147   fact->info.factor_mallocs   = reallocs;
3148   fact->info.fill_ratio_given = fill;
3149   if (ai[am] != 0) {
3150     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3151   } else {
3152     fact->info.fill_ratio_needed = 0.0;
3153   }
3154   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3155   PetscFunctionReturn(0);
3156 }
3157 
3158 #undef __FUNCT__
3159 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
3160 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3161 {
3162   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3163   PetscErrorCode    ierr;
3164   PetscInt          n   = A->rmap->n;
3165   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3166   PetscScalar       *x,sum;
3167   const PetscScalar *b;
3168   const MatScalar   *aa = a->a,*v;
3169   PetscInt          i,nz;
3170 
3171   PetscFunctionBegin;
3172   if (!n) PetscFunctionReturn(0);
3173 
3174   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3175   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3176 
3177   /* forward solve the lower triangular */
3178   x[0] = b[0];
3179   v    = aa;
3180   vi   = aj;
3181   for (i=1; i<n; i++) {
3182     nz  = ai[i+1] - ai[i];
3183     sum = b[i];
3184     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3185     v   += nz;
3186     vi  += nz;
3187     x[i] = sum;
3188   }
3189 
3190   /* backward solve the upper triangular */
3191   for (i=n-1; i>=0; i--) {
3192     v   = aa + adiag[i+1] + 1;
3193     vi  = aj + adiag[i+1] + 1;
3194     nz  = adiag[i] - adiag[i+1]-1;
3195     sum = x[i];
3196     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3197     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3198   }
3199 
3200   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3201   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3202   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3203   PetscFunctionReturn(0);
3204 }
3205 
3206 #undef __FUNCT__
3207 #define __FUNCT__ "MatSolve_SeqAIJ"
3208 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3209 {
3210   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
3211   IS                iscol = a->col,isrow = a->row;
3212   PetscErrorCode    ierr;
3213   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3214   const PetscInt    *rout,*cout,*r,*c;
3215   PetscScalar       *x,*tmp,sum;
3216   const PetscScalar *b;
3217   const MatScalar   *aa = a->a,*v;
3218 
3219   PetscFunctionBegin;
3220   if (!n) PetscFunctionReturn(0);
3221 
3222   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3223   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3224   tmp  = a->solve_work;
3225 
3226   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
3227   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
3228 
3229   /* forward solve the lower triangular */
3230   tmp[0] = b[r[0]];
3231   v      = aa;
3232   vi     = aj;
3233   for (i=1; i<n; i++) {
3234     nz  = ai[i+1] - ai[i];
3235     sum = b[r[i]];
3236     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3237     tmp[i] = sum;
3238     v     += nz; vi += nz;
3239   }
3240 
3241   /* backward solve the upper triangular */
3242   for (i=n-1; i>=0; i--) {
3243     v   = aa + adiag[i+1]+1;
3244     vi  = aj + adiag[i+1]+1;
3245     nz  = adiag[i]-adiag[i+1]-1;
3246     sum = tmp[i];
3247     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3248     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3249   }
3250 
3251   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
3252   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
3253   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3254   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3255   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
3256   PetscFunctionReturn(0);
3257 }
3258 
3259 #undef __FUNCT__
3260 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
3261 /*
3262     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3263 */
3264 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3265 {
3266   Mat            B = *fact;
3267   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b;
3268   IS             isicol;
3269   PetscErrorCode ierr;
3270   const PetscInt *r,*ic;
3271   PetscInt       i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3272   PetscInt       *bi,*bj,*bdiag,*bdiag_rev;
3273   PetscInt       row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3274   PetscInt       nlnk,*lnk;
3275   PetscBT        lnkbt;
3276   PetscBool      row_identity,icol_identity;
3277   MatScalar      *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3278   const PetscInt *ics;
3279   PetscInt       j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3280   PetscReal      dt     =info->dt,shift=info->shiftamount;
3281   PetscInt       dtcount=(PetscInt)info->dtcount,nnz_max;
3282   PetscBool      missing;
3283 
3284   PetscFunctionBegin;
3285   if (dt      == PETSC_DEFAULT) dt = 0.005;
3286   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3287 
3288   /* ------- symbolic factorization, can be reused ---------*/
3289   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3290   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3291   adiag=a->diag;
3292 
3293   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
3294 
3295   /* bdiag is location of diagonal in factor */
3296   ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);     /* becomes b->diag */
3297   ierr = PetscMalloc1(n+1,&bdiag_rev);CHKERRQ(ierr); /* temporary */
3298 
3299   /* allocate row pointers bi */
3300   ierr = PetscMalloc1(2*n+2,&bi);CHKERRQ(ierr);
3301 
3302   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3303   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3304   nnz_max = ai[n]+2*n*dtcount+2;
3305 
3306   ierr = PetscMalloc1(nnz_max+1,&bj);CHKERRQ(ierr);
3307   ierr = PetscMalloc1(nnz_max+1,&ba);CHKERRQ(ierr);
3308 
3309   /* put together the new matrix */
3310   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
3311   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr);
3312   b    = (Mat_SeqAIJ*)B->data;
3313 
3314   b->free_a       = PETSC_TRUE;
3315   b->free_ij      = PETSC_TRUE;
3316   b->singlemalloc = PETSC_FALSE;
3317 
3318   b->a    = ba;
3319   b->j    = bj;
3320   b->i    = bi;
3321   b->diag = bdiag;
3322   b->ilen = 0;
3323   b->imax = 0;
3324   b->row  = isrow;
3325   b->col  = iscol;
3326   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
3327   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
3328   b->icol = isicol;
3329 
3330   ierr     = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
3331   ierr     = PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3332   b->maxnz = nnz_max;
3333 
3334   B->factortype            = MAT_FACTOR_ILUDT;
3335   B->info.factor_mallocs   = 0;
3336   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3337   /* ------- end of symbolic factorization ---------*/
3338 
3339   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3340   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3341   ics  = ic;
3342 
3343   /* linked list for storing column indices of the active row */
3344   nlnk = n + 1;
3345   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3346 
3347   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3348   ierr = PetscMalloc2(n,&im,n,&jtmp);CHKERRQ(ierr);
3349   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3350   ierr = PetscMalloc2(n,&rtmp,n,&vtmp);CHKERRQ(ierr);
3351   ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr);
3352 
3353   bi[0]        = 0;
3354   bdiag[0]     = nnz_max-1; /* location of diag[0] in factor B */
3355   bdiag_rev[n] = bdiag[0];
3356   bi[2*n+1]    = bdiag[0]+1; /* endof bj and ba array */
3357   for (i=0; i<n; i++) {
3358     /* copy initial fill into linked list */
3359     nzi = ai[r[i]+1] - ai[r[i]];
3360     if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3361     nzi_al = adiag[r[i]] - ai[r[i]];
3362     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3363     ajtmp  = aj + ai[r[i]];
3364     ierr   = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3365 
3366     /* load in initial (unfactored row) */
3367     aatmp = a->a + ai[r[i]];
3368     for (j=0; j<nzi; j++) {
3369       rtmp[ics[*ajtmp++]] = *aatmp++;
3370     }
3371 
3372     /* add pivot rows into linked list */
3373     row = lnk[n];
3374     while (row < i) {
3375       nzi_bl = bi[row+1] - bi[row] + 1;
3376       bjtmp  = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3377       ierr   = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
3378       nzi   += nlnk;
3379       row    = lnk[row];
3380     }
3381 
3382     /* copy data from lnk into jtmp, then initialize lnk */
3383     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
3384 
3385     /* numerical factorization */
3386     bjtmp = jtmp;
3387     row   = *bjtmp++; /* 1st pivot row */
3388     while (row < i) {
3389       pc         = rtmp + row;
3390       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3391       multiplier = (*pc) * (*pv);
3392       *pc        = multiplier;
3393       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3394         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3395         pv = ba + bdiag[row+1] + 1;
3396         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3397         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3398         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3399         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3400       }
3401       row = *bjtmp++;
3402     }
3403 
3404     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3405     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3406     nzi_bl   = 0; j = 0;
3407     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3408       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3409       nzi_bl++; j++;
3410     }
3411     nzi_bu = nzi - nzi_bl -1;
3412     while (j < nzi) {
3413       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3414       j++;
3415     }
3416 
3417     bjtmp = bj + bi[i];
3418     batmp = ba + bi[i];
3419     /* apply level dropping rule to L part */
3420     ncut = nzi_al + dtcount;
3421     if (ncut < nzi_bl) {
3422       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
3423       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
3424     } else {
3425       ncut = nzi_bl;
3426     }
3427     for (j=0; j<ncut; j++) {
3428       bjtmp[j] = jtmp[j];
3429       batmp[j] = vtmp[j];
3430       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3431     }
3432     bi[i+1] = bi[i] + ncut;
3433     nzi     = ncut + 1;
3434 
3435     /* apply level dropping rule to U part */
3436     ncut = nzi_au + dtcount;
3437     if (ncut < nzi_bu) {
3438       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
3439       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
3440     } else {
3441       ncut = nzi_bu;
3442     }
3443     nzi += ncut;
3444 
3445     /* mark bdiagonal */
3446     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3447     bdiag_rev[n-i-1] = bdiag[i+1];
3448     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3449     bjtmp            = bj + bdiag[i];
3450     batmp            = ba + bdiag[i];
3451     *bjtmp           = i;
3452     *batmp           = diag_tmp; /* rtmp[i]; */
3453     if (*batmp == 0.0) {
3454       *batmp = dt+shift;
3455       /* printf(" row %d add shift %g\n",i,shift); */
3456     }
3457     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3458     /* printf(" (%d,%g),",*bjtmp,*batmp); */
3459 
3460     bjtmp = bj + bdiag[i+1]+1;
3461     batmp = ba + bdiag[i+1]+1;
3462     for (k=0; k<ncut; k++) {
3463       bjtmp[k] = jtmp[nzi_bl+1+k];
3464       batmp[k] = vtmp[nzi_bl+1+k];
3465       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3466     }
3467     /* printf("\n"); */
3468 
3469     im[i] = nzi;   /* used by PetscLLAddSortedLU() */
3470     /*
3471     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3472     printf(" ----------------------------\n");
3473     */
3474   } /* for (i=0; i<n; i++) */
3475     /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3476   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3477 
3478   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3479   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3480 
3481   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3482   ierr = PetscFree2(im,jtmp);CHKERRQ(ierr);
3483   ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr);
3484   ierr = PetscFree(bdiag_rev);CHKERRQ(ierr);
3485 
3486   ierr     = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
3487   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3488 
3489   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3490   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
3491   if (row_identity && icol_identity) {
3492     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3493   } else {
3494     B->ops->solve = MatSolve_SeqAIJ;
3495   }
3496 
3497   B->ops->solveadd          = 0;
3498   B->ops->solvetranspose    = 0;
3499   B->ops->solvetransposeadd = 0;
3500   B->ops->matsolve          = 0;
3501   B->assembled              = PETSC_TRUE;
3502   B->preallocated           = PETSC_TRUE;
3503   PetscFunctionReturn(0);
3504 }
3505 
3506 /* a wraper of MatILUDTFactor_SeqAIJ() */
3507 #undef __FUNCT__
3508 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
3509 /*
3510     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3511 */
3512 
3513 PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3514 {
3515   PetscErrorCode ierr;
3516 
3517   PetscFunctionBegin;
3518   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
3519   PetscFunctionReturn(0);
3520 }
3521 
3522 /*
3523    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3524    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3525 */
3526 #undef __FUNCT__
3527 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
3528 /*
3529     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3530 */
3531 
3532 PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3533 {
3534   Mat            C     =fact;
3535   Mat_SeqAIJ     *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3536   IS             isrow = b->row,isicol = b->icol;
3537   PetscErrorCode ierr;
3538   const PetscInt *r,*ic,*ics;
3539   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3540   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3541   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3542   PetscReal      dt=info->dt,shift=info->shiftamount;
3543   PetscBool      row_identity, col_identity;
3544 
3545   PetscFunctionBegin;
3546   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3547   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3548   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
3549   ics  = ic;
3550 
3551   for (i=0; i<n; i++) {
3552     /* initialize rtmp array */
3553     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3554     bjtmp = bj + bi[i];
3555     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3556     rtmp[i] = 0.0;
3557     nzu     = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3558     bjtmp   = bj + bdiag[i+1] + 1;
3559     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3560 
3561     /* load in initial unfactored row of A */
3562     /* printf("row %d\n",i); */
3563     nz    = ai[r[i]+1] - ai[r[i]];
3564     ajtmp = aj + ai[r[i]];
3565     v     = aa + ai[r[i]];
3566     for (j=0; j<nz; j++) {
3567       rtmp[ics[*ajtmp++]] = v[j];
3568       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3569     }
3570     /* printf("\n"); */
3571 
3572     /* numerical factorization */
3573     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3574     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3575     k     = 0;
3576     while (k < nzl) {
3577       row = *bjtmp++;
3578       /* printf("  prow %d\n",row); */
3579       pc         = rtmp + row;
3580       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3581       multiplier = (*pc) * (*pv);
3582       *pc        = multiplier;
3583       if (PetscAbsScalar(multiplier) > dt) {
3584         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3585         pv = b->a + bdiag[row+1] + 1;
3586         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3587         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3588         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3589       }
3590       k++;
3591     }
3592 
3593     /* finished row so stick it into b->a */
3594     /* L-part */
3595     pv  = b->a + bi[i];
3596     pj  = bj + bi[i];
3597     nzl = bi[i+1] - bi[i];
3598     for (j=0; j<nzl; j++) {
3599       pv[j] = rtmp[pj[j]];
3600       /* printf(" (%d,%g),",pj[j],pv[j]); */
3601     }
3602 
3603     /* diagonal: invert diagonal entries for simplier triangular solves */
3604     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3605     b->a[bdiag[i]] = 1.0/rtmp[i];
3606     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3607 
3608     /* U-part */
3609     pv  = b->a + bdiag[i+1] + 1;
3610     pj  = bj + bdiag[i+1] + 1;
3611     nzu = bdiag[i] - bdiag[i+1] - 1;
3612     for (j=0; j<nzu; j++) {
3613       pv[j] = rtmp[pj[j]];
3614       /* printf(" (%d,%g),",pj[j],pv[j]); */
3615     }
3616     /* printf("\n"); */
3617   }
3618 
3619   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3620   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3621   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3622 
3623   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3624   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
3625   if (row_identity && col_identity) {
3626     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3627   } else {
3628     C->ops->solve = MatSolve_SeqAIJ;
3629   }
3630   C->ops->solveadd          = 0;
3631   C->ops->solvetranspose    = 0;
3632   C->ops->solvetransposeadd = 0;
3633   C->ops->matsolve          = 0;
3634   C->assembled              = PETSC_TRUE;
3635   C->preallocated           = PETSC_TRUE;
3636 
3637   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
3638   PetscFunctionReturn(0);
3639 }
3640