xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 0a7e80ddf00b85a8bec408344d0bbd42e7f7dbd2)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ"
9 /*
10       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
11 
12       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
13 */
14 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
15 {
16   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
17   PetscErrorCode    ierr;
18   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
19   const PetscInt    *ai = a->i, *aj = a->j;
20   const PetscScalar *aa = a->a;
21   PetscBool         *done;
22   PetscReal         best,past = 0,future;
23 
24   PetscFunctionBegin;
25   /* pick initial row */
26   best = -1;
27   for (i=0; i<n; i++) {
28     future = 0.0;
29     for (j=ai[i]; j<ai[i+1]; j++) {
30       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
31       else              past  = PetscAbsScalar(aa[j]);
32     }
33     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34     if (past/future > best) {
35       best    = past/future;
36       current = i;
37     }
38   }
39 
40   ierr     = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr);
41   ierr     = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr);
42   ierr     = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr);
43   order[0] = current;
44   for (i=0; i<n-1; i++) {
45     done[current] = PETSC_TRUE;
46     best          = -1;
47     /* loop over all neighbors of current pivot */
48     for (j=ai[current]; j<ai[current+1]; j++) {
49       jj = aj[j];
50       if (done[jj]) continue;
51       /* loop over columns of potential next row computing weights for below and above diagonal */
52       past = future = 0.0;
53       for (k=ai[jj]; k<ai[jj+1]; k++) {
54         kk = aj[k];
55         if (done[kk]) past += PetscAbsScalar(aa[k]);
56         else if (kk != jj) future += PetscAbsScalar(aa[k]);
57       }
58       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59       if (past/future > best) {
60         best       = past/future;
61         newcurrent = jj;
62       }
63     }
64     if (best == -1) { /* no neighbors to select from so select best of all that remain */
65       best = -1;
66       for (k=0; k<n; k++) {
67         if (done[k]) continue;
68         future = 0.0;
69         past   = 0.0;
70         for (j=ai[k]; j<ai[k+1]; j++) {
71           kk = aj[j];
72           if (done[kk])       past += PetscAbsScalar(aa[j]);
73           else if (kk != k) future += PetscAbsScalar(aa[j]);
74         }
75         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76         if (past/future > best) {
77           best       = past/future;
78           newcurrent = k;
79         }
80       }
81     }
82     if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
83     current    = newcurrent;
84     order[i+1] = current;
85   }
86   ierr  = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr);
87   *icol = *irow;
88   ierr  = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr);
89   ierr  = PetscFree(done);CHKERRQ(ierr);
90   ierr  = PetscFree(order);CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 
94 #undef __FUNCT__
95 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc"
96 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool  *flg)
97 {
98   PetscFunctionBegin;
99   *flg = PETSC_TRUE;
100   PetscFunctionReturn(0);
101 }
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
105 PETSC_EXTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
106 {
107   PetscInt       n = A->rmap->n;
108   PetscErrorCode ierr;
109 
110   PetscFunctionBegin;
111 #if defined(PETSC_USE_COMPLEX)
112   if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported");
113 #endif
114   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
115   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
116   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
117     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
118 
119     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
120     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
121 
122     ierr = MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
123   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
124     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
125     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
126 
127     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
128     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
129   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
130   (*B)->factortype = ftype;
131   PetscFunctionReturn(0);
132 }
133 
134 #undef __FUNCT__
135 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace"
136 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
137 {
138   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
139   IS                 isicol;
140   PetscErrorCode     ierr;
141   const PetscInt     *r,*ic;
142   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
143   PetscInt           *bi,*bj,*ajtmp;
144   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
145   PetscReal          f;
146   PetscInt           nlnk,*lnk,k,**bi_ptr;
147   PetscFreeSpaceList free_space=NULL,current_space=NULL;
148   PetscBT            lnkbt;
149 
150   PetscFunctionBegin;
151   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
152   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
153   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
154   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
155 
156   /* get new row pointers */
157   ierr  = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
158   bi[0] = 0;
159 
160   /* bdiag is location of diagonal in factor */
161   ierr     = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
162   bdiag[0] = 0;
163 
164   /* linked list for storing column indices of the active row */
165   nlnk = n + 1;
166   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
167 
168   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
169 
170   /* initial FreeSpace size is f*(ai[n]+1) */
171   f             = info->fill;
172   ierr          = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
173   current_space = free_space;
174 
175   for (i=0; i<n; i++) {
176     /* copy previous fill into linked list */
177     nzi = 0;
178     nnz = ai[r[i]+1] - ai[r[i]];
179     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
180     ajtmp = aj + ai[r[i]];
181     ierr  = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
182     nzi  += nlnk;
183 
184     /* add pivot rows into linked list */
185     row = lnk[n];
186     while (row < i) {
187       nzbd  = bdiag[row] - bi[row] + 1;   /* num of entries in the row with column index <= row */
188       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
189       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
190       nzi  += nlnk;
191       row   = lnk[row];
192     }
193     bi[i+1] = bi[i] + nzi;
194     im[i]   = nzi;
195 
196     /* mark bdiag */
197     nzbd = 0;
198     nnz  = nzi;
199     k    = lnk[n];
200     while (nnz-- && k < i) {
201       nzbd++;
202       k = lnk[k];
203     }
204     bdiag[i] = bi[i] + nzbd;
205 
206     /* if free space is not available, make more free space */
207     if (current_space->local_remaining<nzi) {
208       nnz  = (n - i)*nzi; /* estimated and max additional space needed */
209       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
210       reallocs++;
211     }
212 
213     /* copy data into free space, then initialize lnk */
214     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
215 
216     bi_ptr[i]                       = current_space->array;
217     current_space->array           += nzi;
218     current_space->local_used      += nzi;
219     current_space->local_remaining -= nzi;
220   }
221 #if defined(PETSC_USE_INFO)
222   if (ai[n] != 0) {
223     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
224     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
225     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
226     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
227     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
228   } else {
229     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
230   }
231 #endif
232 
233   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
234   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
235 
236   /* destroy list of free space and other temporary array(s) */
237   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
238   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
239   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
240   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
241 
242   /* put together the new matrix */
243   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
244   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
245   b    = (Mat_SeqAIJ*)(B)->data;
246 
247   b->free_a       = PETSC_TRUE;
248   b->free_ij      = PETSC_TRUE;
249   b->singlemalloc = PETSC_FALSE;
250 
251   ierr    = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
252   b->j    = bj;
253   b->i    = bi;
254   b->diag = bdiag;
255   b->ilen = 0;
256   b->imax = 0;
257   b->row  = isrow;
258   b->col  = iscol;
259   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
260   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
261   b->icol = isicol;
262   ierr    = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
263 
264   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
265   ierr     = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
266   b->maxnz = b->nz = bi[n];
267 
268   (B)->factortype            = MAT_FACTOR_LU;
269   (B)->info.factor_mallocs   = reallocs;
270   (B)->info.fill_ratio_given = f;
271 
272   if (ai[n]) {
273     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
274   } else {
275     (B)->info.fill_ratio_needed = 0.0;
276   }
277   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
278   if (a->inode.size) {
279     (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
280   }
281   PetscFunctionReturn(0);
282 }
283 
284 #undef __FUNCT__
285 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
286 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
287 {
288   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
289   IS                 isicol;
290   PetscErrorCode     ierr;
291   const PetscInt     *r,*ic;
292   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
293   PetscInt           *bi,*bj,*ajtmp;
294   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
295   PetscReal          f;
296   PetscInt           nlnk,*lnk,k,**bi_ptr;
297   PetscFreeSpaceList free_space=NULL,current_space=NULL;
298   PetscBT            lnkbt;
299 
300   PetscFunctionBegin;
301   /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */
302   /*
303   PetscBool          olddatastruct=PETSC_FALSE;
304   ierr = PetscOptionsGetBool(NULL,"-lu_old",&olddatastruct,NULL);CHKERRQ(ierr);
305   if (olddatastruct) {
306     ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr);
307     PetscFunctionReturn(0);
308   }
309   */
310   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
311   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
312   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
313   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
314 
315   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
316   ierr  = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
317   ierr  = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
318   bi[0] = bdiag[0] = 0;
319 
320   /* linked list for storing column indices of the active row */
321   nlnk = n + 1;
322   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
323 
324   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
325 
326   /* initial FreeSpace size is f*(ai[n]+1) */
327   f             = info->fill;
328   ierr          = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
329   current_space = free_space;
330 
331   for (i=0; i<n; i++) {
332     /* copy previous fill into linked list */
333     nzi = 0;
334     nnz = ai[r[i]+1] - ai[r[i]];
335     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
336     ajtmp = aj + ai[r[i]];
337     ierr  = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
338     nzi  += nlnk;
339 
340     /* add pivot rows into linked list */
341     row = lnk[n];
342     while (row < i) {
343       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
344       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
345       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
346       nzi  += nlnk;
347       row   = lnk[row];
348     }
349     bi[i+1] = bi[i] + nzi;
350     im[i]   = nzi;
351 
352     /* mark bdiag */
353     nzbd = 0;
354     nnz  = nzi;
355     k    = lnk[n];
356     while (nnz-- && k < i) {
357       nzbd++;
358       k = lnk[k];
359     }
360     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
361 
362     /* if free space is not available, make more free space */
363     if (current_space->local_remaining<nzi) {
364       nnz  = 2*(n - i)*nzi; /* estimated and max additional space needed */
365       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
366       reallocs++;
367     }
368 
369     /* copy data into free space, then initialize lnk */
370     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
371 
372     bi_ptr[i]                       = current_space->array;
373     current_space->array           += nzi;
374     current_space->local_used      += nzi;
375     current_space->local_remaining -= nzi;
376   }
377 
378   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
379   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
380 
381   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
382   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
383   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
384   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
385   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
386 
387   /* put together the new matrix */
388   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
389   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
390   b    = (Mat_SeqAIJ*)(B)->data;
391 
392   b->free_a       = PETSC_TRUE;
393   b->free_ij      = PETSC_TRUE;
394   b->singlemalloc = PETSC_FALSE;
395 
396   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
397 
398   b->j    = bj;
399   b->i    = bi;
400   b->diag = bdiag;
401   b->ilen = 0;
402   b->imax = 0;
403   b->row  = isrow;
404   b->col  = iscol;
405   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
406   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
407   b->icol = isicol;
408   ierr    = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
409 
410   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
411   ierr     = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
412   b->maxnz = b->nz = bdiag[0]+1;
413 
414   B->factortype            = MAT_FACTOR_LU;
415   B->info.factor_mallocs   = reallocs;
416   B->info.fill_ratio_given = f;
417 
418   if (ai[n]) {
419     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
420   } else {
421     B->info.fill_ratio_needed = 0.0;
422   }
423 #if defined(PETSC_USE_INFO)
424   if (ai[n] != 0) {
425     PetscReal af = B->info.fill_ratio_needed;
426     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
427     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
428     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
429     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
430   } else {
431     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
432   }
433 #endif
434   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
435   if (a->inode.size) {
436     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
437   }
438   PetscFunctionReturn(0);
439 }
440 
441 /*
442     Trouble in factorization, should we dump the original matrix?
443 */
444 #undef __FUNCT__
445 #define __FUNCT__ "MatFactorDumpMatrix"
446 PetscErrorCode MatFactorDumpMatrix(Mat A)
447 {
448   PetscErrorCode ierr;
449   PetscBool      flg = PETSC_FALSE;
450 
451   PetscFunctionBegin;
452   ierr = PetscOptionsGetBool(NULL,"-mat_factor_dump_on_error",&flg,NULL);CHKERRQ(ierr);
453   if (flg) {
454     PetscViewer viewer;
455     char        filename[PETSC_MAX_PATH_LEN];
456 
457     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
458     ierr = PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
459     ierr = MatView(A,viewer);CHKERRQ(ierr);
460     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
461   }
462   PetscFunctionReturn(0);
463 }
464 
465 #undef __FUNCT__
466 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
467 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
468 {
469   Mat             C     =B;
470   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
471   IS              isrow = b->row,isicol = b->icol;
472   PetscErrorCode  ierr;
473   const PetscInt  *r,*ic,*ics;
474   const PetscInt  n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
475   PetscInt        i,j,k,nz,nzL,row,*pj;
476   const PetscInt  *ajtmp,*bjtmp;
477   MatScalar       *rtmp,*pc,multiplier,*pv;
478   const MatScalar *aa=a->a,*v;
479   PetscBool       row_identity,col_identity;
480   FactorShiftCtx  sctx;
481   const PetscInt  *ddiag;
482   PetscReal       rs;
483   MatScalar       d;
484 
485   PetscFunctionBegin;
486   /* MatPivotSetUp(): initialize shift context sctx */
487   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
488 
489   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
490     ddiag          = a->diag;
491     sctx.shift_top = info->zeropivot;
492     for (i=0; i<n; i++) {
493       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
494       d  = (aa)[ddiag[i]];
495       rs = -PetscAbsScalar(d) - PetscRealPart(d);
496       v  = aa+ai[i];
497       nz = ai[i+1] - ai[i];
498       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
499       if (rs>sctx.shift_top) sctx.shift_top = rs;
500     }
501     sctx.shift_top *= 1.1;
502     sctx.nshift_max = 5;
503     sctx.shift_lo   = 0.;
504     sctx.shift_hi   = 1.;
505   }
506 
507   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
508   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
509   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
510   ics  = ic;
511 
512   do {
513     sctx.newshift = PETSC_FALSE;
514     for (i=0; i<n; i++) {
515       /* zero rtmp */
516       /* L part */
517       nz    = bi[i+1] - bi[i];
518       bjtmp = bj + bi[i];
519       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
520 
521       /* U part */
522       nz    = bdiag[i]-bdiag[i+1];
523       bjtmp = bj + bdiag[i+1]+1;
524       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
525 
526       /* load in initial (unfactored row) */
527       nz    = ai[r[i]+1] - ai[r[i]];
528       ajtmp = aj + ai[r[i]];
529       v     = aa + ai[r[i]];
530       for (j=0; j<nz; j++) {
531         rtmp[ics[ajtmp[j]]] = v[j];
532       }
533       /* ZeropivotApply() */
534       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
535 
536       /* elimination */
537       bjtmp = bj + bi[i];
538       row   = *bjtmp++;
539       nzL   = bi[i+1] - bi[i];
540       for (k=0; k < nzL; k++) {
541         pc = rtmp + row;
542         if (*pc != 0.0) {
543           pv         = b->a + bdiag[row];
544           multiplier = *pc * (*pv);
545           *pc        = multiplier;
546 
547           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
548           pv = b->a + bdiag[row+1]+1;
549           nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
550 
551           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
552           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
553         }
554         row = *bjtmp++;
555       }
556 
557       /* finished row so stick it into b->a */
558       rs = 0.0;
559       /* L part */
560       pv = b->a + bi[i];
561       pj = b->j + bi[i];
562       nz = bi[i+1] - bi[i];
563       for (j=0; j<nz; j++) {
564         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
565       }
566 
567       /* U part */
568       pv = b->a + bdiag[i+1]+1;
569       pj = b->j + bdiag[i+1]+1;
570       nz = bdiag[i] - bdiag[i+1]-1;
571       for (j=0; j<nz; j++) {
572         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
573       }
574 
575       sctx.rs = rs;
576       sctx.pv = rtmp[i];
577       ierr    = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
578       if (sctx.newshift) break; /* break for-loop */
579       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
580 
581       /* Mark diagonal and invert diagonal for simplier triangular solves */
582       pv  = b->a + bdiag[i];
583       *pv = 1.0/rtmp[i];
584 
585     } /* endof for (i=0; i<n; i++) { */
586 
587     /* MatPivotRefine() */
588     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
589       /*
590        * if no shift in this attempt & shifting & started shifting & can refine,
591        * then try lower shift
592        */
593       sctx.shift_hi       = sctx.shift_fraction;
594       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
595       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
596       sctx.newshift       = PETSC_TRUE;
597       sctx.nshift++;
598     }
599   } while (sctx.newshift);
600 
601   ierr = PetscFree(rtmp);CHKERRQ(ierr);
602   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
603   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
604 
605   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
606   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
607   if (row_identity && col_identity) {
608     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
609   } else {
610     C->ops->solve = MatSolve_SeqAIJ;
611   }
612   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
613   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
614   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
615   C->ops->matsolve          = MatMatSolve_SeqAIJ;
616   C->assembled              = PETSC_TRUE;
617   C->preallocated           = PETSC_TRUE;
618 
619   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
620 
621   /* MatShiftView(A,info,&sctx) */
622   if (sctx.nshift) {
623     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
624       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
625     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
626       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
627     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
628       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
629     }
630   }
631   ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr);
632   PetscFunctionReturn(0);
633 }
634 
635 #undef __FUNCT__
636 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace"
637 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
638 {
639   Mat             C     =B;
640   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
641   IS              isrow = b->row,isicol = b->icol;
642   PetscErrorCode  ierr;
643   const PetscInt  *r,*ic,*ics;
644   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
645   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
646   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
647   MatScalar       *pv,*rtmp,*pc,multiplier,d;
648   const MatScalar *v,*aa=a->a;
649   PetscReal       rs=0.0;
650   FactorShiftCtx  sctx;
651   const PetscInt  *ddiag;
652   PetscBool       row_identity, col_identity;
653 
654   PetscFunctionBegin;
655   /* MatPivotSetUp(): initialize shift context sctx */
656   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
657 
658   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
659     ddiag          = a->diag;
660     sctx.shift_top = info->zeropivot;
661     for (i=0; i<n; i++) {
662       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
663       d  = (aa)[ddiag[i]];
664       rs = -PetscAbsScalar(d) - PetscRealPart(d);
665       v  = aa+ai[i];
666       nz = ai[i+1] - ai[i];
667       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
668       if (rs>sctx.shift_top) sctx.shift_top = rs;
669     }
670     sctx.shift_top *= 1.1;
671     sctx.nshift_max = 5;
672     sctx.shift_lo   = 0.;
673     sctx.shift_hi   = 1.;
674   }
675 
676   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
677   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
678   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
679   ics  = ic;
680 
681   do {
682     sctx.newshift = PETSC_FALSE;
683     for (i=0; i<n; i++) {
684       nz    = bi[i+1] - bi[i];
685       bjtmp = bj + bi[i];
686       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
687 
688       /* load in initial (unfactored row) */
689       nz    = ai[r[i]+1] - ai[r[i]];
690       ajtmp = aj + ai[r[i]];
691       v     = aa + ai[r[i]];
692       for (j=0; j<nz; j++) {
693         rtmp[ics[ajtmp[j]]] = v[j];
694       }
695       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
696 
697       row = *bjtmp++;
698       while  (row < i) {
699         pc = rtmp + row;
700         if (*pc != 0.0) {
701           pv         = b->a + diag_offset[row];
702           pj         = b->j + diag_offset[row] + 1;
703           multiplier = *pc / *pv++;
704           *pc        = multiplier;
705           nz         = bi[row+1] - diag_offset[row] - 1;
706           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
707           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
708         }
709         row = *bjtmp++;
710       }
711       /* finished row so stick it into b->a */
712       pv   = b->a + bi[i];
713       pj   = b->j + bi[i];
714       nz   = bi[i+1] - bi[i];
715       diag = diag_offset[i] - bi[i];
716       rs   = 0.0;
717       for (j=0; j<nz; j++) {
718         pv[j] = rtmp[pj[j]];
719         rs   += PetscAbsScalar(pv[j]);
720       }
721       rs -= PetscAbsScalar(pv[diag]);
722 
723       sctx.rs = rs;
724       sctx.pv = pv[diag];
725       ierr    = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
726       if (sctx.newshift) break;
727       pv[diag] = sctx.pv;
728     }
729 
730     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
731       /*
732        * if no shift in this attempt & shifting & started shifting & can refine,
733        * then try lower shift
734        */
735       sctx.shift_hi       = sctx.shift_fraction;
736       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
737       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
738       sctx.newshift       = PETSC_TRUE;
739       sctx.nshift++;
740     }
741   } while (sctx.newshift);
742 
743   /* invert diagonal entries for simplier triangular solves */
744   for (i=0; i<n; i++) {
745     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
746   }
747   ierr = PetscFree(rtmp);CHKERRQ(ierr);
748   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
749   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
750 
751   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
752   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
753   if (row_identity && col_identity) {
754     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
755   } else {
756     C->ops->solve = MatSolve_SeqAIJ_inplace;
757   }
758   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
759   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
760   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
761   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
762 
763   C->assembled    = PETSC_TRUE;
764   C->preallocated = PETSC_TRUE;
765 
766   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
767   if (sctx.nshift) {
768     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
769       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
770     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
771       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
772     }
773   }
774   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
775   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
776 
777   ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 /*
782    This routine implements inplace ILU(0) with row or/and column permutations.
783    Input:
784      A - original matrix
785    Output;
786      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
787          a->j (col index) is permuted by the inverse of colperm, then sorted
788          a->a reordered accordingly with a->j
789          a->diag (ptr to diagonal elements) is updated.
790 */
791 #undef __FUNCT__
792 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
793 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
794 {
795   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data;
796   IS              isrow = a->row,isicol = a->icol;
797   PetscErrorCode  ierr;
798   const PetscInt  *r,*ic,*ics;
799   PetscInt        i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
800   PetscInt        *ajtmp,nz,row;
801   PetscInt        *diag = a->diag,nbdiag,*pj;
802   PetscScalar     *rtmp,*pc,multiplier,d;
803   MatScalar       *pv,*v;
804   PetscReal       rs;
805   FactorShiftCtx  sctx;
806   const MatScalar *aa=a->a,*vtmp;
807 
808   PetscFunctionBegin;
809   if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
810 
811   /* MatPivotSetUp(): initialize shift context sctx */
812   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
813 
814   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
815     const PetscInt *ddiag = a->diag;
816     sctx.shift_top = info->zeropivot;
817     for (i=0; i<n; i++) {
818       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
819       d    = (aa)[ddiag[i]];
820       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
821       vtmp = aa+ai[i];
822       nz   = ai[i+1] - ai[i];
823       for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
824       if (rs>sctx.shift_top) sctx.shift_top = rs;
825     }
826     sctx.shift_top *= 1.1;
827     sctx.nshift_max = 5;
828     sctx.shift_lo   = 0.;
829     sctx.shift_hi   = 1.;
830   }
831 
832   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
833   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
834   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr);
835   ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
836   ics  = ic;
837 
838 #if defined(MV)
839   sctx.shift_top      = 0.;
840   sctx.nshift_max     = 0;
841   sctx.shift_lo       = 0.;
842   sctx.shift_hi       = 0.;
843   sctx.shift_fraction = 0.;
844 
845   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
846     sctx.shift_top = 0.;
847     for (i=0; i<n; i++) {
848       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
849       d  = (a->a)[diag[i]];
850       rs = -PetscAbsScalar(d) - PetscRealPart(d);
851       v  = a->a+ai[i];
852       nz = ai[i+1] - ai[i];
853       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
854       if (rs>sctx.shift_top) sctx.shift_top = rs;
855     }
856     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
857     sctx.shift_top *= 1.1;
858     sctx.nshift_max = 5;
859     sctx.shift_lo   = 0.;
860     sctx.shift_hi   = 1.;
861   }
862 
863   sctx.shift_amount = 0.;
864   sctx.nshift       = 0;
865 #endif
866 
867   do {
868     sctx.newshift = PETSC_FALSE;
869     for (i=0; i<n; i++) {
870       /* load in initial unfactored row */
871       nz    = ai[r[i]+1] - ai[r[i]];
872       ajtmp = aj + ai[r[i]];
873       v     = a->a + ai[r[i]];
874       /* sort permuted ajtmp and values v accordingly */
875       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
876       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
877 
878       diag[r[i]] = ai[r[i]];
879       for (j=0; j<nz; j++) {
880         rtmp[ajtmp[j]] = v[j];
881         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
882       }
883       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
884 
885       row = *ajtmp++;
886       while  (row < i) {
887         pc = rtmp + row;
888         if (*pc != 0.0) {
889           pv = a->a + diag[r[row]];
890           pj = aj + diag[r[row]] + 1;
891 
892           multiplier = *pc / *pv++;
893           *pc        = multiplier;
894           nz         = ai[r[row]+1] - diag[r[row]] - 1;
895           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
896           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
897         }
898         row = *ajtmp++;
899       }
900       /* finished row so overwrite it onto a->a */
901       pv     = a->a + ai[r[i]];
902       pj     = aj + ai[r[i]];
903       nz     = ai[r[i]+1] - ai[r[i]];
904       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
905 
906       rs = 0.0;
907       for (j=0; j<nz; j++) {
908         pv[j] = rtmp[pj[j]];
909         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
910       }
911 
912       sctx.rs = rs;
913       sctx.pv = pv[nbdiag];
914       ierr    = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
915       if (sctx.newshift) break;
916       pv[nbdiag] = sctx.pv;
917     }
918 
919     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
920       /*
921        * if no shift in this attempt & shifting & started shifting & can refine,
922        * then try lower shift
923        */
924       sctx.shift_hi       = sctx.shift_fraction;
925       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
926       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
927       sctx.newshift       = PETSC_TRUE;
928       sctx.nshift++;
929     }
930   } while (sctx.newshift);
931 
932   /* invert diagonal entries for simplier triangular solves */
933   for (i=0; i<n; i++) {
934     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
935   }
936 
937   ierr = PetscFree(rtmp);CHKERRQ(ierr);
938   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
939   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
940 
941   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
942   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
943   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
944   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
945 
946   A->assembled    = PETSC_TRUE;
947   A->preallocated = PETSC_TRUE;
948 
949   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
950   if (sctx.nshift) {
951     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
952       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
953     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
954       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
955     }
956   }
957   PetscFunctionReturn(0);
958 }
959 
960 /* ----------------------------------------------------------- */
961 #undef __FUNCT__
962 #define __FUNCT__ "MatLUFactor_SeqAIJ"
963 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
964 {
965   PetscErrorCode ierr;
966   Mat            C;
967 
968   PetscFunctionBegin;
969   ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
970   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
971   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
972 
973   A->ops->solve          = C->ops->solve;
974   A->ops->solvetranspose = C->ops->solvetranspose;
975 
976   ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
977   ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 /* ----------------------------------------------------------- */
981 
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatSolve_SeqAIJ_inplace"
985 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
986 {
987   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
988   IS                iscol = a->col,isrow = a->row;
989   PetscErrorCode    ierr;
990   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
991   PetscInt          nz;
992   const PetscInt    *rout,*cout,*r,*c;
993   PetscScalar       *x,*tmp,*tmps,sum;
994   const PetscScalar *b;
995   const MatScalar   *aa = a->a,*v;
996 
997   PetscFunctionBegin;
998   if (!n) PetscFunctionReturn(0);
999 
1000   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1001   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1002   tmp  = a->solve_work;
1003 
1004   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1005   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1006 
1007   /* forward solve the lower triangular */
1008   tmp[0] = b[*r++];
1009   tmps   = tmp;
1010   for (i=1; i<n; i++) {
1011     v   = aa + ai[i];
1012     vi  = aj + ai[i];
1013     nz  = a->diag[i] - ai[i];
1014     sum = b[*r++];
1015     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1016     tmp[i] = sum;
1017   }
1018 
1019   /* backward solve the upper triangular */
1020   for (i=n-1; i>=0; i--) {
1021     v   = aa + a->diag[i] + 1;
1022     vi  = aj + a->diag[i] + 1;
1023     nz  = ai[i+1] - a->diag[i] - 1;
1024     sum = tmp[i];
1025     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1026     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1027   }
1028 
1029   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1030   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1031   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1032   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1033   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace"
1039 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1040 {
1041   Mat_SeqAIJ      *a    = (Mat_SeqAIJ*)A->data;
1042   IS              iscol = a->col,isrow = a->row;
1043   PetscErrorCode  ierr;
1044   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1045   PetscInt        nz,neq;
1046   const PetscInt  *rout,*cout,*r,*c;
1047   PetscScalar     *x,*b,*tmp,*tmps,sum;
1048   const MatScalar *aa = a->a,*v;
1049   PetscBool       bisdense,xisdense;
1050 
1051   PetscFunctionBegin;
1052   if (!n) PetscFunctionReturn(0);
1053 
1054   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1055   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1056   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1057   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1058 
1059   ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
1060   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1061 
1062   tmp  = a->solve_work;
1063   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1064   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1065 
1066   for (neq=0; neq<B->cmap->n; neq++) {
1067     /* forward solve the lower triangular */
1068     tmp[0] = b[r[0]];
1069     tmps   = tmp;
1070     for (i=1; i<n; i++) {
1071       v   = aa + ai[i];
1072       vi  = aj + ai[i];
1073       nz  = a->diag[i] - ai[i];
1074       sum = b[r[i]];
1075       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1076       tmp[i] = sum;
1077     }
1078     /* backward solve the upper triangular */
1079     for (i=n-1; i>=0; i--) {
1080       v   = aa + a->diag[i] + 1;
1081       vi  = aj + a->diag[i] + 1;
1082       nz  = ai[i+1] - a->diag[i] - 1;
1083       sum = tmp[i];
1084       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1085       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1086     }
1087 
1088     b += n;
1089     x += n;
1090   }
1091   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1092   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1093   ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
1094   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1095   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatMatSolve_SeqAIJ"
1101 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1102 {
1103   Mat_SeqAIJ      *a    = (Mat_SeqAIJ*)A->data;
1104   IS              iscol = a->col,isrow = a->row;
1105   PetscErrorCode  ierr;
1106   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1107   PetscInt        nz,neq;
1108   const PetscInt  *rout,*cout,*r,*c;
1109   PetscScalar     *x,*b,*tmp,sum;
1110   const MatScalar *aa = a->a,*v;
1111   PetscBool       bisdense,xisdense;
1112 
1113   PetscFunctionBegin;
1114   if (!n) PetscFunctionReturn(0);
1115 
1116   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1117   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1118   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1119   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1120 
1121   ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
1122   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1123 
1124   tmp  = a->solve_work;
1125   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1126   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1127 
1128   for (neq=0; neq<B->cmap->n; neq++) {
1129     /* forward solve the lower triangular */
1130     tmp[0] = b[r[0]];
1131     v      = aa;
1132     vi     = aj;
1133     for (i=1; i<n; i++) {
1134       nz  = ai[i+1] - ai[i];
1135       sum = b[r[i]];
1136       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1137       tmp[i] = sum;
1138       v     += nz; vi += nz;
1139     }
1140 
1141     /* backward solve the upper triangular */
1142     for (i=n-1; i>=0; i--) {
1143       v   = aa + adiag[i+1]+1;
1144       vi  = aj + adiag[i+1]+1;
1145       nz  = adiag[i]-adiag[i+1]-1;
1146       sum = tmp[i];
1147       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1148       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1149     }
1150 
1151     b += n;
1152     x += n;
1153   }
1154   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1155   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1156   ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
1157   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1158   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
1164 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1165 {
1166   Mat_SeqAIJ      *a    = (Mat_SeqAIJ*)A->data;
1167   IS              iscol = a->col,isrow = a->row;
1168   PetscErrorCode  ierr;
1169   const PetscInt  *r,*c,*rout,*cout;
1170   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1171   PetscInt        nz,row;
1172   PetscScalar     *x,*b,*tmp,*tmps,sum;
1173   const MatScalar *aa = a->a,*v;
1174 
1175   PetscFunctionBegin;
1176   if (!n) PetscFunctionReturn(0);
1177 
1178   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1179   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1180   tmp  = a->solve_work;
1181 
1182   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1183   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1184 
1185   /* forward solve the lower triangular */
1186   tmp[0] = b[*r++];
1187   tmps   = tmp;
1188   for (row=1; row<n; row++) {
1189     i   = rout[row]; /* permuted row */
1190     v   = aa + ai[i];
1191     vi  = aj + ai[i];
1192     nz  = a->diag[i] - ai[i];
1193     sum = b[*r++];
1194     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1195     tmp[row] = sum;
1196   }
1197 
1198   /* backward solve the upper triangular */
1199   for (row=n-1; row>=0; row--) {
1200     i   = rout[row]; /* permuted row */
1201     v   = aa + a->diag[i] + 1;
1202     vi  = aj + a->diag[i] + 1;
1203     nz  = ai[i+1] - a->diag[i] - 1;
1204     sum = tmp[row];
1205     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1206     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1207   }
1208 
1209   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1210   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1211   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1212   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1213   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 /* ----------------------------------------------------------- */
1218 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1219 #undef __FUNCT__
1220 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace"
1221 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1222 {
1223   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1224   PetscErrorCode    ierr;
1225   PetscInt          n   = A->rmap->n;
1226   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1227   PetscScalar       *x;
1228   const PetscScalar *b;
1229   const MatScalar   *aa = a->a;
1230 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1231   PetscInt        adiag_i,i,nz,ai_i;
1232   const PetscInt  *vi;
1233   const MatScalar *v;
1234   PetscScalar     sum;
1235 #endif
1236 
1237   PetscFunctionBegin;
1238   if (!n) PetscFunctionReturn(0);
1239 
1240   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1241   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1242 
1243 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1244   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1245 #else
1246   /* forward solve the lower triangular */
1247   x[0] = b[0];
1248   for (i=1; i<n; i++) {
1249     ai_i = ai[i];
1250     v    = aa + ai_i;
1251     vi   = aj + ai_i;
1252     nz   = adiag[i] - ai_i;
1253     sum  = b[i];
1254     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1255     x[i] = sum;
1256   }
1257 
1258   /* backward solve the upper triangular */
1259   for (i=n-1; i>=0; i--) {
1260     adiag_i = adiag[i];
1261     v       = aa + adiag_i + 1;
1262     vi      = aj + adiag_i + 1;
1263     nz      = ai[i+1] - adiag_i - 1;
1264     sum     = x[i];
1265     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1266     x[i] = sum*aa[adiag_i];
1267   }
1268 #endif
1269   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1270   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1271   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace"
1277 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1278 {
1279   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1280   IS                iscol = a->col,isrow = a->row;
1281   PetscErrorCode    ierr;
1282   PetscInt          i, n = A->rmap->n,j;
1283   PetscInt          nz;
1284   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1285   PetscScalar       *x,*tmp,sum;
1286   const PetscScalar *b;
1287   const MatScalar   *aa = a->a,*v;
1288 
1289   PetscFunctionBegin;
1290   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1291 
1292   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1293   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1294   tmp  = a->solve_work;
1295 
1296   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1297   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1298 
1299   /* forward solve the lower triangular */
1300   tmp[0] = b[*r++];
1301   for (i=1; i<n; i++) {
1302     v   = aa + ai[i];
1303     vi  = aj + ai[i];
1304     nz  = a->diag[i] - ai[i];
1305     sum = b[*r++];
1306     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1307     tmp[i] = sum;
1308   }
1309 
1310   /* backward solve the upper triangular */
1311   for (i=n-1; i>=0; i--) {
1312     v   = aa + a->diag[i] + 1;
1313     vi  = aj + a->diag[i] + 1;
1314     nz  = ai[i+1] - a->diag[i] - 1;
1315     sum = tmp[i];
1316     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1317     tmp[i]   = sum*aa[a->diag[i]];
1318     x[*c--] += tmp[i];
1319   }
1320 
1321   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1322   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1323   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1324   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1325   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 #undef __FUNCT__
1330 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1331 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1332 {
1333   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1334   IS                iscol = a->col,isrow = a->row;
1335   PetscErrorCode    ierr;
1336   PetscInt          i, n = A->rmap->n,j;
1337   PetscInt          nz;
1338   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1339   PetscScalar       *x,*tmp,sum;
1340   const PetscScalar *b;
1341   const MatScalar   *aa = a->a,*v;
1342 
1343   PetscFunctionBegin;
1344   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1345 
1346   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1347   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1348   tmp  = a->solve_work;
1349 
1350   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1351   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1352 
1353   /* forward solve the lower triangular */
1354   tmp[0] = b[r[0]];
1355   v      = aa;
1356   vi     = aj;
1357   for (i=1; i<n; i++) {
1358     nz  = ai[i+1] - ai[i];
1359     sum = b[r[i]];
1360     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1361     tmp[i] = sum;
1362     v     += nz;
1363     vi    += nz;
1364   }
1365 
1366   /* backward solve the upper triangular */
1367   v  = aa + adiag[n-1];
1368   vi = aj + adiag[n-1];
1369   for (i=n-1; i>=0; i--) {
1370     nz  = adiag[i] - adiag[i+1] - 1;
1371     sum = tmp[i];
1372     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1373     tmp[i]   = sum*v[nz];
1374     x[c[i]] += tmp[i];
1375     v       += nz+1; vi += nz+1;
1376   }
1377 
1378   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1379   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1380   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1381   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1382   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 #undef __FUNCT__
1387 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace"
1388 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1389 {
1390   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1391   IS                iscol = a->col,isrow = a->row;
1392   PetscErrorCode    ierr;
1393   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1394   PetscInt          i,n = A->rmap->n,j;
1395   PetscInt          nz;
1396   PetscScalar       *x,*tmp,s1;
1397   const MatScalar   *aa = a->a,*v;
1398   const PetscScalar *b;
1399 
1400   PetscFunctionBegin;
1401   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1402   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1403   tmp  = a->solve_work;
1404 
1405   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1406   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1407 
1408   /* copy the b into temp work space according to permutation */
1409   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1410 
1411   /* forward solve the U^T */
1412   for (i=0; i<n; i++) {
1413     v   = aa + diag[i];
1414     vi  = aj + diag[i] + 1;
1415     nz  = ai[i+1] - diag[i] - 1;
1416     s1  = tmp[i];
1417     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1418     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1419     tmp[i] = s1;
1420   }
1421 
1422   /* backward solve the L^T */
1423   for (i=n-1; i>=0; i--) {
1424     v  = aa + diag[i] - 1;
1425     vi = aj + diag[i] - 1;
1426     nz = diag[i] - ai[i];
1427     s1 = tmp[i];
1428     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1429   }
1430 
1431   /* copy tmp into x according to permutation */
1432   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1433 
1434   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1435   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1436   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1437   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1438 
1439   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1440   PetscFunctionReturn(0);
1441 }
1442 
1443 #undef __FUNCT__
1444 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1445 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1446 {
1447   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1448   IS                iscol = a->col,isrow = a->row;
1449   PetscErrorCode    ierr;
1450   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1451   PetscInt          i,n = A->rmap->n,j;
1452   PetscInt          nz;
1453   PetscScalar       *x,*tmp,s1;
1454   const MatScalar   *aa = a->a,*v;
1455   const PetscScalar *b;
1456 
1457   PetscFunctionBegin;
1458   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1459   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1460   tmp  = a->solve_work;
1461 
1462   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1463   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1464 
1465   /* copy the b into temp work space according to permutation */
1466   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1467 
1468   /* forward solve the U^T */
1469   for (i=0; i<n; i++) {
1470     v   = aa + adiag[i+1] + 1;
1471     vi  = aj + adiag[i+1] + 1;
1472     nz  = adiag[i] - adiag[i+1] - 1;
1473     s1  = tmp[i];
1474     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1475     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1476     tmp[i] = s1;
1477   }
1478 
1479   /* backward solve the L^T */
1480   for (i=n-1; i>=0; i--) {
1481     v  = aa + ai[i];
1482     vi = aj + ai[i];
1483     nz = ai[i+1] - ai[i];
1484     s1 = tmp[i];
1485     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1486   }
1487 
1488   /* copy tmp into x according to permutation */
1489   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1490 
1491   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1492   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1493   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1494   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1495 
1496   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace"
1502 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1503 {
1504   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1505   IS                iscol = a->col,isrow = a->row;
1506   PetscErrorCode    ierr;
1507   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1508   PetscInt          i,n = A->rmap->n,j;
1509   PetscInt          nz;
1510   PetscScalar       *x,*tmp,s1;
1511   const MatScalar   *aa = a->a,*v;
1512   const PetscScalar *b;
1513 
1514   PetscFunctionBegin;
1515   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1516   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1517   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1518   tmp  = a->solve_work;
1519 
1520   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1521   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1522 
1523   /* copy the b into temp work space according to permutation */
1524   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1525 
1526   /* forward solve the U^T */
1527   for (i=0; i<n; i++) {
1528     v   = aa + diag[i];
1529     vi  = aj + diag[i] + 1;
1530     nz  = ai[i+1] - diag[i] - 1;
1531     s1  = tmp[i];
1532     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1533     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1534     tmp[i] = s1;
1535   }
1536 
1537   /* backward solve the L^T */
1538   for (i=n-1; i>=0; i--) {
1539     v  = aa + diag[i] - 1;
1540     vi = aj + diag[i] - 1;
1541     nz = diag[i] - ai[i];
1542     s1 = tmp[i];
1543     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1544   }
1545 
1546   /* copy tmp into x according to permutation */
1547   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1548 
1549   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1550   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1551   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1552   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1553 
1554   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1560 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1561 {
1562   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1563   IS                iscol = a->col,isrow = a->row;
1564   PetscErrorCode    ierr;
1565   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1566   PetscInt          i,n = A->rmap->n,j;
1567   PetscInt          nz;
1568   PetscScalar       *x,*tmp,s1;
1569   const MatScalar   *aa = a->a,*v;
1570   const PetscScalar *b;
1571 
1572   PetscFunctionBegin;
1573   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1574   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1575   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1576   tmp  = a->solve_work;
1577 
1578   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1579   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1580 
1581   /* copy the b into temp work space according to permutation */
1582   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1583 
1584   /* forward solve the U^T */
1585   for (i=0; i<n; i++) {
1586     v   = aa + adiag[i+1] + 1;
1587     vi  = aj + adiag[i+1] + 1;
1588     nz  = adiag[i] - adiag[i+1] - 1;
1589     s1  = tmp[i];
1590     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1591     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1592     tmp[i] = s1;
1593   }
1594 
1595 
1596   /* backward solve the L^T */
1597   for (i=n-1; i>=0; i--) {
1598     v  = aa + ai[i];
1599     vi = aj + ai[i];
1600     nz = ai[i+1] - ai[i];
1601     s1 = tmp[i];
1602     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1603   }
1604 
1605   /* copy tmp into x according to permutation */
1606   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1607 
1608   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1609   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1610   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1611   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1612 
1613   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 /* ----------------------------------------------------------------*/
1618 
1619 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool);
1620 
1621 /*
1622    ilu() under revised new data structure.
1623    Factored arrays bj and ba are stored as
1624      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1625 
1626    bi=fact->i is an array of size n+1, in which
1627    bi+
1628      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1629      bi[n]:  points to L(n-1,n-1)+1
1630 
1631   bdiag=fact->diag is an array of size n+1,in which
1632      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1633      bdiag[n]: points to entry of U(n-1,0)-1
1634 
1635    U(i,:) contains bdiag[i] as its last entry, i.e.,
1636     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1637 */
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0"
1640 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1641 {
1642 
1643   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
1644   PetscErrorCode ierr;
1645   const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1646   PetscInt       i,j,k=0,nz,*bi,*bj,*bdiag;
1647   PetscBool      missing;
1648   IS             isicol;
1649 
1650   PetscFunctionBegin;
1651   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1652   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1653   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1654   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1655   ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr);
1656   b    = (Mat_SeqAIJ*)(fact)->data;
1657 
1658   /* allocate matrix arrays for new data structure */
1659   ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr);
1660   ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1661 
1662   b->singlemalloc = PETSC_TRUE;
1663   if (!b->diag) {
1664     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr);
1665     ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1666   }
1667   bdiag = b->diag;
1668 
1669   if (n > 0) {
1670     ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr);
1671   }
1672 
1673   /* set bi and bj with new data structure */
1674   bi = b->i;
1675   bj = b->j;
1676 
1677   /* L part */
1678   bi[0] = 0;
1679   for (i=0; i<n; i++) {
1680     nz      = adiag[i] - ai[i];
1681     bi[i+1] = bi[i] + nz;
1682     aj      = a->j + ai[i];
1683     for (j=0; j<nz; j++) {
1684       /*   *bj = aj[j]; bj++; */
1685       bj[k++] = aj[j];
1686     }
1687   }
1688 
1689   /* U part */
1690   bdiag[n] = bi[n]-1;
1691   for (i=n-1; i>=0; i--) {
1692     nz = ai[i+1] - adiag[i] - 1;
1693     aj = a->j + adiag[i] + 1;
1694     for (j=0; j<nz; j++) {
1695       /*      *bj = aj[j]; bj++; */
1696       bj[k++] = aj[j];
1697     }
1698     /* diag[i] */
1699     /*    *bj = i; bj++; */
1700     bj[k++]  = i;
1701     bdiag[i] = bdiag[i+1] + nz + 1;
1702   }
1703 
1704   fact->factortype             = MAT_FACTOR_ILU;
1705   fact->info.factor_mallocs    = 0;
1706   fact->info.fill_ratio_given  = info->fill;
1707   fact->info.fill_ratio_needed = 1.0;
1708   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1709 
1710   b       = (Mat_SeqAIJ*)(fact)->data;
1711   b->row  = isrow;
1712   b->col  = iscol;
1713   b->icol = isicol;
1714   ierr    = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1715   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1716   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1722 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1723 {
1724   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1725   IS                 isicol;
1726   PetscErrorCode     ierr;
1727   const PetscInt     *r,*ic;
1728   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1729   PetscInt           *bi,*cols,nnz,*cols_lvl;
1730   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1731   PetscInt           i,levels,diagonal_fill;
1732   PetscBool          col_identity,row_identity;
1733   PetscReal          f;
1734   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1735   PetscBT            lnkbt;
1736   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1737   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1738   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1739 
1740   PetscFunctionBegin;
1741   /* Uncomment the old data struct part only while testing new data structure for MatSolve() */
1742   /*
1743   PetscBool          olddatastruct=PETSC_FALSE;
1744   ierr = PetscOptionsGetBool(NULL,"-ilu_old",&olddatastruct,NULL);CHKERRQ(ierr);
1745   if (olddatastruct) {
1746     ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1747     PetscFunctionReturn(0);
1748   }
1749   */
1750 
1751   levels = (PetscInt)info->levels;
1752   ierr   = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1753   ierr   = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1754   if (!levels && row_identity && col_identity) {
1755     /* special case: ilu(0) with natural ordering */
1756     ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1757     if (a->inode.size) {
1758       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1759     }
1760     PetscFunctionReturn(0);
1761   }
1762 
1763   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1764   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1765   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1766   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1767 
1768   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1769   ierr  = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1770   ierr  = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1771   bi[0] = bdiag[0] = 0;
1772   ierr  = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1773 
1774   /* create a linked list for storing column indices of the active row */
1775   nlnk = n + 1;
1776   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1777 
1778   /* initial FreeSpace size is f*(ai[n]+1) */
1779   f                 = info->fill;
1780   diagonal_fill     = (PetscInt)info->diagonal_fill;
1781   ierr              = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1782   current_space     = free_space;
1783   ierr              = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1784   current_space_lvl = free_space_lvl;
1785   for (i=0; i<n; i++) {
1786     nzi = 0;
1787     /* copy current row into linked list */
1788     nnz = ai[r[i]+1] - ai[r[i]];
1789     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1790     cols   = aj + ai[r[i]];
1791     lnk[i] = -1; /* marker to indicate if diagonal exists */
1792     ierr   = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1793     nzi   += nlnk;
1794 
1795     /* make sure diagonal entry is included */
1796     if (diagonal_fill && lnk[i] == -1) {
1797       fm = n;
1798       while (lnk[fm] < i) fm = lnk[fm];
1799       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1800       lnk[fm]    = i;
1801       lnk_lvl[i] = 0;
1802       nzi++; dcount++;
1803     }
1804 
1805     /* add pivot rows into the active row */
1806     nzbd = 0;
1807     prow = lnk[n];
1808     while (prow < i) {
1809       nnz      = bdiag[prow];
1810       cols     = bj_ptr[prow] + nnz + 1;
1811       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1812       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1813       ierr     = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1814       nzi     += nlnk;
1815       prow     = lnk[prow];
1816       nzbd++;
1817     }
1818     bdiag[i] = nzbd;
1819     bi[i+1]  = bi[i] + nzi;
1820     /* if free space is not available, make more free space */
1821     if (current_space->local_remaining<nzi) {
1822       nnz  = 2*nzi*(n - i); /* estimated and max additional space needed */
1823       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1824       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1825       reallocs++;
1826     }
1827 
1828     /* copy data into free_space and free_space_lvl, then initialize lnk */
1829     ierr         = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1830     bj_ptr[i]    = current_space->array;
1831     bjlvl_ptr[i] = current_space_lvl->array;
1832 
1833     /* make sure the active row i has diagonal entry */
1834     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1835 
1836     current_space->array               += nzi;
1837     current_space->local_used          += nzi;
1838     current_space->local_remaining     -= nzi;
1839     current_space_lvl->array           += nzi;
1840     current_space_lvl->local_used      += nzi;
1841     current_space_lvl->local_remaining -= nzi;
1842   }
1843 
1844   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1845   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1846   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1847   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
1848   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
1849 
1850   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1851   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1852   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
1853 
1854 #if defined(PETSC_USE_INFO)
1855   {
1856     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1857     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1858     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1859     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
1860     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1861     if (diagonal_fill) {
1862       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
1863     }
1864   }
1865 #endif
1866   /* put together the new matrix */
1867   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
1868   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
1869   b    = (Mat_SeqAIJ*)(fact)->data;
1870 
1871   b->free_a       = PETSC_TRUE;
1872   b->free_ij      = PETSC_TRUE;
1873   b->singlemalloc = PETSC_FALSE;
1874 
1875   ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
1876 
1877   b->j    = bj;
1878   b->i    = bi;
1879   b->diag = bdiag;
1880   b->ilen = 0;
1881   b->imax = 0;
1882   b->row  = isrow;
1883   b->col  = iscol;
1884   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1885   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1886   b->icol = isicol;
1887 
1888   ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1889   /* In b structure:  Free imax, ilen, old a, old j.
1890      Allocate bdiag, solve_work, new a, new j */
1891   ierr     = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1892   b->maxnz = b->nz = bdiag[0]+1;
1893 
1894   (fact)->info.factor_mallocs    = reallocs;
1895   (fact)->info.fill_ratio_given  = f;
1896   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1897   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1898   if (a->inode.size) {
1899     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1900   }
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 #undef __FUNCT__
1905 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace"
1906 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1907 {
1908   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1909   IS                 isicol;
1910   PetscErrorCode     ierr;
1911   const PetscInt     *r,*ic;
1912   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1913   PetscInt           *bi,*cols,nnz,*cols_lvl;
1914   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1915   PetscInt           i,levels,diagonal_fill;
1916   PetscBool          col_identity,row_identity;
1917   PetscReal          f;
1918   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1919   PetscBT            lnkbt;
1920   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1921   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1922   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1923   PetscBool          missing;
1924 
1925   PetscFunctionBegin;
1926   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1927   f             = info->fill;
1928   levels        = (PetscInt)info->levels;
1929   diagonal_fill = (PetscInt)info->diagonal_fill;
1930 
1931   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1932 
1933   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1934   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1935   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1936     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr);
1937 
1938     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1939     if (a->inode.size) {
1940       (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1941     }
1942     fact->factortype               = MAT_FACTOR_ILU;
1943     (fact)->info.factor_mallocs    = 0;
1944     (fact)->info.fill_ratio_given  = info->fill;
1945     (fact)->info.fill_ratio_needed = 1.0;
1946 
1947     b    = (Mat_SeqAIJ*)(fact)->data;
1948     ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
1949     if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1950     b->row  = isrow;
1951     b->col  = iscol;
1952     b->icol = isicol;
1953     ierr    = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1954     ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1955     ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1956     PetscFunctionReturn(0);
1957   }
1958 
1959   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1960   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1961 
1962   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1963   ierr  = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
1964   ierr  = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
1965   bi[0] = bdiag[0] = 0;
1966 
1967   ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr);
1968 
1969   /* create a linked list for storing column indices of the active row */
1970   nlnk = n + 1;
1971   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1972 
1973   /* initial FreeSpace size is f*(ai[n]+1) */
1974   ierr              = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
1975   current_space     = free_space;
1976   ierr              = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr);
1977   current_space_lvl = free_space_lvl;
1978 
1979   for (i=0; i<n; i++) {
1980     nzi = 0;
1981     /* copy current row into linked list */
1982     nnz = ai[r[i]+1] - ai[r[i]];
1983     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1984     cols   = aj + ai[r[i]];
1985     lnk[i] = -1; /* marker to indicate if diagonal exists */
1986     ierr   = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1987     nzi   += nlnk;
1988 
1989     /* make sure diagonal entry is included */
1990     if (diagonal_fill && lnk[i] == -1) {
1991       fm = n;
1992       while (lnk[fm] < i) fm = lnk[fm];
1993       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1994       lnk[fm]    = i;
1995       lnk_lvl[i] = 0;
1996       nzi++; dcount++;
1997     }
1998 
1999     /* add pivot rows into the active row */
2000     nzbd = 0;
2001     prow = lnk[n];
2002     while (prow < i) {
2003       nnz      = bdiag[prow];
2004       cols     = bj_ptr[prow] + nnz + 1;
2005       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
2006       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
2007       ierr     = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
2008       nzi     += nlnk;
2009       prow     = lnk[prow];
2010       nzbd++;
2011     }
2012     bdiag[i] = nzbd;
2013     bi[i+1]  = bi[i] + nzi;
2014 
2015     /* if free space is not available, make more free space */
2016     if (current_space->local_remaining<nzi) {
2017       nnz  = nzi*(n - i); /* estimated and max additional space needed */
2018       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
2019       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
2020       reallocs++;
2021     }
2022 
2023     /* copy data into free_space and free_space_lvl, then initialize lnk */
2024     ierr         = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2025     bj_ptr[i]    = current_space->array;
2026     bjlvl_ptr[i] = current_space_lvl->array;
2027 
2028     /* make sure the active row i has diagonal entry */
2029     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2030 
2031     current_space->array               += nzi;
2032     current_space->local_used          += nzi;
2033     current_space->local_remaining     -= nzi;
2034     current_space_lvl->array           += nzi;
2035     current_space_lvl->local_used      += nzi;
2036     current_space_lvl->local_remaining -= nzi;
2037   }
2038 
2039   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2040   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2041 
2042   /* destroy list of free space and other temporary arrays */
2043   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
2044   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */
2045   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2046   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2047   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
2048 
2049 #if defined(PETSC_USE_INFO)
2050   {
2051     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2052     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
2053     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2054     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr);
2055     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2056     if (diagonal_fill) {
2057       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr);
2058     }
2059   }
2060 #endif
2061 
2062   /* put together the new matrix */
2063   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
2064   ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr);
2065   b    = (Mat_SeqAIJ*)(fact)->data;
2066 
2067   b->free_a       = PETSC_TRUE;
2068   b->free_ij      = PETSC_TRUE;
2069   b->singlemalloc = PETSC_FALSE;
2070 
2071   ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr);
2072   b->j = bj;
2073   b->i = bi;
2074   for (i=0; i<n; i++) bdiag[i] += bi[i];
2075   b->diag = bdiag;
2076   b->ilen = 0;
2077   b->imax = 0;
2078   b->row  = isrow;
2079   b->col  = iscol;
2080   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2081   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2082   b->icol = isicol;
2083   ierr    = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2084   /* In b structure:  Free imax, ilen, old a, old j.
2085      Allocate bdiag, solve_work, new a, new j */
2086   ierr     = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
2087   b->maxnz = b->nz = bi[n];
2088 
2089   (fact)->info.factor_mallocs    = reallocs;
2090   (fact)->info.fill_ratio_given  = f;
2091   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2092   (fact)->ops->lufactornumeric   =  MatLUFactorNumeric_SeqAIJ_inplace;
2093   if (a->inode.size) {
2094     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2095   }
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 #undef __FUNCT__
2100 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
2101 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2102 {
2103   Mat            C = B;
2104   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2105   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2106   IS             ip=b->row,iip = b->icol;
2107   PetscErrorCode ierr;
2108   const PetscInt *rip,*riip;
2109   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2110   PetscInt       *ai=a->i,*aj=a->j;
2111   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2112   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2113   PetscBool      perm_identity;
2114   FactorShiftCtx sctx;
2115   PetscReal      rs;
2116   MatScalar      d,*v;
2117 
2118   PetscFunctionBegin;
2119   /* MatPivotSetUp(): initialize shift context sctx */
2120   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2121 
2122   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2123     sctx.shift_top = info->zeropivot;
2124     for (i=0; i<mbs; i++) {
2125       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2126       d  = (aa)[a->diag[i]];
2127       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2128       v  = aa+ai[i];
2129       nz = ai[i+1] - ai[i];
2130       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2131       if (rs>sctx.shift_top) sctx.shift_top = rs;
2132     }
2133     sctx.shift_top *= 1.1;
2134     sctx.nshift_max = 5;
2135     sctx.shift_lo   = 0.;
2136     sctx.shift_hi   = 1.;
2137   }
2138 
2139   ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2140   ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2141 
2142   /* allocate working arrays
2143      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2144      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2145   */
2146   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr);
2147 
2148   do {
2149     sctx.newshift = PETSC_FALSE;
2150 
2151     for (i=0; i<mbs; i++) c2r[i] = mbs;
2152     if (mbs) il[0] = 0;
2153 
2154     for (k = 0; k<mbs; k++) {
2155       /* zero rtmp */
2156       nz    = bi[k+1] - bi[k];
2157       bjtmp = bj + bi[k];
2158       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2159 
2160       /* load in initial unfactored row */
2161       bval = ba + bi[k];
2162       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2163       for (j = jmin; j < jmax; j++) {
2164         col = riip[aj[j]];
2165         if (col >= k) { /* only take upper triangular entry */
2166           rtmp[col] = aa[j];
2167           *bval++   = 0.0; /* for in-place factorization */
2168         }
2169       }
2170       /* shift the diagonal of the matrix: ZeropivotApply() */
2171       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2172 
2173       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2174       dk = rtmp[k];
2175       i  = c2r[k]; /* first row to be added to k_th row  */
2176 
2177       while (i < k) {
2178         nexti = c2r[i]; /* next row to be added to k_th row */
2179 
2180         /* compute multiplier, update diag(k) and U(i,k) */
2181         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2182         uikdi   = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2183         dk     += uikdi*ba[ili]; /* update diag[k] */
2184         ba[ili] = uikdi; /* -U(i,k) */
2185 
2186         /* add multiple of row i to k-th row */
2187         jmin = ili + 1; jmax = bi[i+1];
2188         if (jmin < jmax) {
2189           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2190           /* update il and c2r for row i */
2191           il[i] = jmin;
2192           j     = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2193         }
2194         i = nexti;
2195       }
2196 
2197       /* copy data into U(k,:) */
2198       rs   = 0.0;
2199       jmin = bi[k]; jmax = bi[k+1]-1;
2200       if (jmin < jmax) {
2201         for (j=jmin; j<jmax; j++) {
2202           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2203         }
2204         /* add the k-th row into il and c2r */
2205         il[k] = jmin;
2206         i     = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2207       }
2208 
2209       /* MatPivotCheck() */
2210       sctx.rs = rs;
2211       sctx.pv = dk;
2212       ierr    = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr);
2213       if (sctx.newshift) break;
2214       dk = sctx.pv;
2215 
2216       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2217     }
2218   } while (sctx.newshift);
2219 
2220   ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr);
2221   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2222   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2223 
2224   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2225   if (perm_identity) {
2226     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2227     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2228     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2229     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2230   } else {
2231     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2232     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2233     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2234     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2235   }
2236 
2237   C->assembled    = PETSC_TRUE;
2238   C->preallocated = PETSC_TRUE;
2239 
2240   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2241 
2242   /* MatPivotView() */
2243   if (sctx.nshift) {
2244     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2245       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr);
2246     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2247       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2248     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2249       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr);
2250     }
2251   }
2252   PetscFunctionReturn(0);
2253 }
2254 
2255 #undef __FUNCT__
2256 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace"
2257 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2258 {
2259   Mat            C = B;
2260   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2261   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2262   IS             ip=b->row,iip = b->icol;
2263   PetscErrorCode ierr;
2264   const PetscInt *rip,*riip;
2265   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2266   PetscInt       *ai=a->i,*aj=a->j;
2267   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2268   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2269   PetscBool      perm_identity;
2270   FactorShiftCtx sctx;
2271   PetscReal      rs;
2272   MatScalar      d,*v;
2273 
2274   PetscFunctionBegin;
2275   /* MatPivotSetUp(): initialize shift context sctx */
2276   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2277 
2278   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2279     sctx.shift_top = info->zeropivot;
2280     for (i=0; i<mbs; i++) {
2281       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2282       d  = (aa)[a->diag[i]];
2283       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2284       v  = aa+ai[i];
2285       nz = ai[i+1] - ai[i];
2286       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2287       if (rs>sctx.shift_top) sctx.shift_top = rs;
2288     }
2289     sctx.shift_top *= 1.1;
2290     sctx.nshift_max = 5;
2291     sctx.shift_lo   = 0.;
2292     sctx.shift_hi   = 1.;
2293   }
2294 
2295   ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2296   ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2297 
2298   /* initialization */
2299   ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr);
2300 
2301   do {
2302     sctx.newshift = PETSC_FALSE;
2303 
2304     for (i=0; i<mbs; i++) jl[i] = mbs;
2305     il[0] = 0;
2306 
2307     for (k = 0; k<mbs; k++) {
2308       /* zero rtmp */
2309       nz    = bi[k+1] - bi[k];
2310       bjtmp = bj + bi[k];
2311       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2312 
2313       bval = ba + bi[k];
2314       /* initialize k-th row by the perm[k]-th row of A */
2315       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2316       for (j = jmin; j < jmax; j++) {
2317         col = riip[aj[j]];
2318         if (col >= k) { /* only take upper triangular entry */
2319           rtmp[col] = aa[j];
2320           *bval++   = 0.0; /* for in-place factorization */
2321         }
2322       }
2323       /* shift the diagonal of the matrix */
2324       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2325 
2326       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2327       dk = rtmp[k];
2328       i  = jl[k]; /* first row to be added to k_th row  */
2329 
2330       while (i < k) {
2331         nexti = jl[i]; /* next row to be added to k_th row */
2332 
2333         /* compute multiplier, update diag(k) and U(i,k) */
2334         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2335         uikdi   = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2336         dk     += uikdi*ba[ili];
2337         ba[ili] = uikdi; /* -U(i,k) */
2338 
2339         /* add multiple of row i to k-th row */
2340         jmin = ili + 1; jmax = bi[i+1];
2341         if (jmin < jmax) {
2342           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2343           /* update il and jl for row i */
2344           il[i] = jmin;
2345           j     = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2346         }
2347         i = nexti;
2348       }
2349 
2350       /* shift the diagonals when zero pivot is detected */
2351       /* compute rs=sum of abs(off-diagonal) */
2352       rs   = 0.0;
2353       jmin = bi[k]+1;
2354       nz   = bi[k+1] - jmin;
2355       bcol = bj + jmin;
2356       for (j=0; j<nz; j++) {
2357         rs += PetscAbsScalar(rtmp[bcol[j]]);
2358       }
2359 
2360       sctx.rs = rs;
2361       sctx.pv = dk;
2362       ierr    = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr);
2363       if (sctx.newshift) break;
2364       dk = sctx.pv;
2365 
2366       /* copy data into U(k,:) */
2367       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2368       jmin      = bi[k]+1; jmax = bi[k+1];
2369       if (jmin < jmax) {
2370         for (j=jmin; j<jmax; j++) {
2371           col = bj[j]; ba[j] = rtmp[col];
2372         }
2373         /* add the k-th row into il and jl */
2374         il[k] = jmin;
2375         i     = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2376       }
2377     }
2378   } while (sctx.newshift);
2379 
2380   ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr);
2381   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2382   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2383 
2384   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2385   if (perm_identity) {
2386     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2387     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2388     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2389     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2390   } else {
2391     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2392     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2393     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2394     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2395   }
2396 
2397   C->assembled    = PETSC_TRUE;
2398   C->preallocated = PETSC_TRUE;
2399 
2400   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2401   if (sctx.nshift) {
2402     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2403       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2404     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2405       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
2406     }
2407   }
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 /*
2412    icc() under revised new data structure.
2413    Factored arrays bj and ba are stored as
2414      U(0,:),...,U(i,:),U(n-1,:)
2415 
2416    ui=fact->i is an array of size n+1, in which
2417    ui+
2418      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2419      ui[n]:  points to U(n-1,n-1)+1
2420 
2421   udiag=fact->diag is an array of size n,in which
2422      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2423 
2424    U(i,:) contains udiag[i] as its last entry, i.e.,
2425     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2426 */
2427 
2428 #undef __FUNCT__
2429 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
2430 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2431 {
2432   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2433   Mat_SeqSBAIJ       *b;
2434   PetscErrorCode     ierr;
2435   PetscBool          perm_identity,missing;
2436   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2437   const PetscInt     *rip,*riip;
2438   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2439   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2440   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2441   PetscReal          fill          =info->fill,levels=info->levels;
2442   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2443   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2444   PetscBT            lnkbt;
2445   IS                 iperm;
2446 
2447   PetscFunctionBegin;
2448   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2449   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2450   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2451   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2452   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2453 
2454   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2455   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2456   ui[0] = 0;
2457 
2458   /* ICC(0) without matrix ordering: simply rearrange column indices */
2459   if (!levels && perm_identity) {
2460     for (i=0; i<am; i++) {
2461       ncols    = ai[i+1] - a->diag[i];
2462       ui[i+1]  = ui[i] + ncols;
2463       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2464     }
2465     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2466     cols = uj;
2467     for (i=0; i<am; i++) {
2468       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2469       ncols = ai[i+1] - a->diag[i] -1;
2470       for (j=0; j<ncols; j++) *cols++ = aj[j];
2471       *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2472     }
2473   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2474     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2475     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2476 
2477     /* initialization */
2478     ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2479 
2480     /* jl: linked list for storing indices of the pivot rows
2481        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2482     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2483     for (i=0; i<am; i++) {
2484       jl[i] = am; il[i] = 0;
2485     }
2486 
2487     /* create and initialize a linked list for storing column indices of the active row k */
2488     nlnk = am + 1;
2489     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2490 
2491     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2492     ierr              = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2493     current_space     = free_space;
2494     ierr              = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr);
2495     current_space_lvl = free_space_lvl;
2496 
2497     for (k=0; k<am; k++) {  /* for each active row k */
2498       /* initialize lnk by the column indices of row rip[k] of A */
2499       nzk   = 0;
2500       ncols = ai[rip[k]+1] - ai[rip[k]];
2501       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2502       ncols_upper = 0;
2503       for (j=0; j<ncols; j++) {
2504         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2505         if (riip[i] >= k) { /* only take upper triangular entry */
2506           ajtmp[ncols_upper] = i;
2507           ncols_upper++;
2508         }
2509       }
2510       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2511       nzk += nlnk;
2512 
2513       /* update lnk by computing fill-in for each pivot row to be merged in */
2514       prow = jl[k]; /* 1st pivot row */
2515 
2516       while (prow < k) {
2517         nextprow = jl[prow];
2518 
2519         /* merge prow into k-th row */
2520         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2521         jmax  = ui[prow+1];
2522         ncols = jmax-jmin;
2523         i     = jmin - ui[prow];
2524         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2525         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2526         j     = *(uj - 1);
2527         ierr  = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2528         nzk  += nlnk;
2529 
2530         /* update il and jl for prow */
2531         if (jmin < jmax) {
2532           il[prow] = jmin;
2533           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2534         }
2535         prow = nextprow;
2536       }
2537 
2538       /* if free space is not available, make more free space */
2539       if (current_space->local_remaining<nzk) {
2540         i    = am - k + 1; /* num of unfactored rows */
2541         i   *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2542         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2543         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2544         reallocs++;
2545       }
2546 
2547       /* copy data into free_space and free_space_lvl, then initialize lnk */
2548       if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2549       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2550 
2551       /* add the k-th row into il and jl */
2552       if (nzk > 1) {
2553         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2554         jl[k] = jl[i]; jl[i] = k;
2555         il[k] = ui[k] + 1;
2556       }
2557       uj_ptr[k]     = current_space->array;
2558       uj_lvl_ptr[k] = current_space_lvl->array;
2559 
2560       current_space->array           += nzk;
2561       current_space->local_used      += nzk;
2562       current_space->local_remaining -= nzk;
2563 
2564       current_space_lvl->array           += nzk;
2565       current_space_lvl->local_used      += nzk;
2566       current_space_lvl->local_remaining -= nzk;
2567 
2568       ui[k+1] = ui[k] + nzk;
2569     }
2570 
2571     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2572     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2573     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2574     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2575 
2576     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2577     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2578     ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor  */
2579     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2580     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2581 
2582   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2583 
2584   /* put together the new matrix in MATSEQSBAIJ format */
2585   b               = (Mat_SeqSBAIJ*)(fact)->data;
2586   b->singlemalloc = PETSC_FALSE;
2587 
2588   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2589 
2590   b->j             = uj;
2591   b->i             = ui;
2592   b->diag          = udiag;
2593   b->free_diag     = PETSC_TRUE;
2594   b->ilen          = 0;
2595   b->imax          = 0;
2596   b->row           = perm;
2597   b->col           = perm;
2598   ierr             = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2599   ierr             = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2600   b->icol          = iperm;
2601   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2602 
2603   ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2604   ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2605 
2606   b->maxnz   = b->nz = ui[am];
2607   b->free_a  = PETSC_TRUE;
2608   b->free_ij = PETSC_TRUE;
2609 
2610   fact->info.factor_mallocs   = reallocs;
2611   fact->info.fill_ratio_given = fill;
2612   if (ai[am] != 0) {
2613     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2614     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2615   } else {
2616     fact->info.fill_ratio_needed = 0.0;
2617   }
2618 #if defined(PETSC_USE_INFO)
2619   if (ai[am] != 0) {
2620     PetscReal af = fact->info.fill_ratio_needed;
2621     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2622     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2623     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2624   } else {
2625     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2626   }
2627 #endif
2628   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2629   PetscFunctionReturn(0);
2630 }
2631 
2632 #undef __FUNCT__
2633 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace"
2634 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2635 {
2636   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2637   Mat_SeqSBAIJ       *b;
2638   PetscErrorCode     ierr;
2639   PetscBool          perm_identity,missing;
2640   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2641   const PetscInt     *rip,*riip;
2642   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2643   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2644   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2645   PetscReal          fill          =info->fill,levels=info->levels;
2646   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2647   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2648   PetscBT            lnkbt;
2649   IS                 iperm;
2650 
2651   PetscFunctionBegin;
2652   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2653   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2654   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2655   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2656   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2657 
2658   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2659   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2660   ui[0] = 0;
2661 
2662   /* ICC(0) without matrix ordering: simply copies fill pattern */
2663   if (!levels && perm_identity) {
2664 
2665     for (i=0; i<am; i++) {
2666       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2667       udiag[i] = ui[i];
2668     }
2669     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2670     cols = uj;
2671     for (i=0; i<am; i++) {
2672       aj    = a->j + a->diag[i];
2673       ncols = ui[i+1] - ui[i];
2674       for (j=0; j<ncols; j++) *cols++ = *aj++;
2675     }
2676   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2677     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2678     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2679 
2680     /* initialization */
2681     ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr);
2682 
2683     /* jl: linked list for storing indices of the pivot rows
2684        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2685     ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr);
2686     for (i=0; i<am; i++) {
2687       jl[i] = am; il[i] = 0;
2688     }
2689 
2690     /* create and initialize a linked list for storing column indices of the active row k */
2691     nlnk = am + 1;
2692     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2693 
2694     /* initial FreeSpace size is fill*(ai[am]+1) */
2695     ierr              = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2696     current_space     = free_space;
2697     ierr              = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2698     current_space_lvl = free_space_lvl;
2699 
2700     for (k=0; k<am; k++) {  /* for each active row k */
2701       /* initialize lnk by the column indices of row rip[k] of A */
2702       nzk   = 0;
2703       ncols = ai[rip[k]+1] - ai[rip[k]];
2704       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2705       ncols_upper = 0;
2706       for (j=0; j<ncols; j++) {
2707         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2708         if (riip[i] >= k) { /* only take upper triangular entry */
2709           ajtmp[ncols_upper] = i;
2710           ncols_upper++;
2711         }
2712       }
2713       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2714       nzk += nlnk;
2715 
2716       /* update lnk by computing fill-in for each pivot row to be merged in */
2717       prow = jl[k]; /* 1st pivot row */
2718 
2719       while (prow < k) {
2720         nextprow = jl[prow];
2721 
2722         /* merge prow into k-th row */
2723         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2724         jmax  = ui[prow+1];
2725         ncols = jmax-jmin;
2726         i     = jmin - ui[prow];
2727         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2728         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2729         j     = *(uj - 1);
2730         ierr  = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2731         nzk  += nlnk;
2732 
2733         /* update il and jl for prow */
2734         if (jmin < jmax) {
2735           il[prow] = jmin;
2736           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2737         }
2738         prow = nextprow;
2739       }
2740 
2741       /* if free space is not available, make more free space */
2742       if (current_space->local_remaining<nzk) {
2743         i    = am - k + 1; /* num of unfactored rows */
2744         i   *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2745         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2746         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2747         reallocs++;
2748       }
2749 
2750       /* copy data into free_space and free_space_lvl, then initialize lnk */
2751       if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2752       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2753 
2754       /* add the k-th row into il and jl */
2755       if (nzk > 1) {
2756         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2757         jl[k] = jl[i]; jl[i] = k;
2758         il[k] = ui[k] + 1;
2759       }
2760       uj_ptr[k]     = current_space->array;
2761       uj_lvl_ptr[k] = current_space_lvl->array;
2762 
2763       current_space->array           += nzk;
2764       current_space->local_used      += nzk;
2765       current_space->local_remaining -= nzk;
2766 
2767       current_space_lvl->array           += nzk;
2768       current_space_lvl->local_used      += nzk;
2769       current_space_lvl->local_remaining -= nzk;
2770 
2771       ui[k+1] = ui[k] + nzk;
2772     }
2773 
2774 #if defined(PETSC_USE_INFO)
2775     if (ai[am] != 0) {
2776       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2777       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2778       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2779       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2780     } else {
2781       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2782     }
2783 #endif
2784 
2785     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2786     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2787     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2788     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2789 
2790     /* destroy list of free space and other temporary array(s) */
2791     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2792     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2793     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2794     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2795 
2796   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2797 
2798   /* put together the new matrix in MATSEQSBAIJ format */
2799 
2800   b               = (Mat_SeqSBAIJ*)fact->data;
2801   b->singlemalloc = PETSC_FALSE;
2802 
2803   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2804 
2805   b->j         = uj;
2806   b->i         = ui;
2807   b->diag      = udiag;
2808   b->free_diag = PETSC_TRUE;
2809   b->ilen      = 0;
2810   b->imax      = 0;
2811   b->row       = perm;
2812   b->col       = perm;
2813 
2814   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2815   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2816 
2817   b->icol          = iperm;
2818   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2819   ierr             = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2820   ierr             = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2821   b->maxnz         = b->nz = ui[am];
2822   b->free_a        = PETSC_TRUE;
2823   b->free_ij       = PETSC_TRUE;
2824 
2825   fact->info.factor_mallocs   = reallocs;
2826   fact->info.fill_ratio_given = fill;
2827   if (ai[am] != 0) {
2828     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2829   } else {
2830     fact->info.fill_ratio_needed = 0.0;
2831   }
2832   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2833   PetscFunctionReturn(0);
2834 }
2835 
2836 #undef __FUNCT__
2837 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
2838 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2839 {
2840   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2841   Mat_SeqSBAIJ       *b;
2842   PetscErrorCode     ierr;
2843   PetscBool          perm_identity;
2844   PetscReal          fill = info->fill;
2845   const PetscInt     *rip,*riip;
2846   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2847   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2848   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2849   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2850   PetscBT            lnkbt;
2851   IS                 iperm;
2852 
2853   PetscFunctionBegin;
2854   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2855   /* check whether perm is the identity mapping */
2856   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2857   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2858   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2859   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2860 
2861   /* initialization */
2862   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2863   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr);
2864   ui[0] = 0;
2865 
2866   /* jl: linked list for storing indices of the pivot rows
2867      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2868   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
2869   for (i=0; i<am; i++) {
2870     jl[i] = am; il[i] = 0;
2871   }
2872 
2873   /* create and initialize a linked list for storing column indices of the active row k */
2874   nlnk = am + 1;
2875   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2876 
2877   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2878   ierr          = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2879   current_space = free_space;
2880 
2881   for (k=0; k<am; k++) {  /* for each active row k */
2882     /* initialize lnk by the column indices of row rip[k] of A */
2883     nzk   = 0;
2884     ncols = ai[rip[k]+1] - ai[rip[k]];
2885     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2886     ncols_upper = 0;
2887     for (j=0; j<ncols; j++) {
2888       i = riip[*(aj + ai[rip[k]] + j)];
2889       if (i >= k) { /* only take upper triangular entry */
2890         cols[ncols_upper] = i;
2891         ncols_upper++;
2892       }
2893     }
2894     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2895     nzk += nlnk;
2896 
2897     /* update lnk by computing fill-in for each pivot row to be merged in */
2898     prow = jl[k]; /* 1st pivot row */
2899 
2900     while (prow < k) {
2901       nextprow = jl[prow];
2902       /* merge prow into k-th row */
2903       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2904       jmax   = ui[prow+1];
2905       ncols  = jmax-jmin;
2906       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2907       ierr   = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2908       nzk   += nlnk;
2909 
2910       /* update il and jl for prow */
2911       if (jmin < jmax) {
2912         il[prow] = jmin;
2913         j        = *uj_ptr;
2914         jl[prow] = jl[j];
2915         jl[j]    = prow;
2916       }
2917       prow = nextprow;
2918     }
2919 
2920     /* if free space is not available, make more free space */
2921     if (current_space->local_remaining<nzk) {
2922       i    = am - k + 1; /* num of unfactored rows */
2923       i   *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2924       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2925       reallocs++;
2926     }
2927 
2928     /* copy data into free space, then initialize lnk */
2929     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
2930 
2931     /* add the k-th row into il and jl */
2932     if (nzk > 1) {
2933       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2934       jl[k] = jl[i]; jl[i] = k;
2935       il[k] = ui[k] + 1;
2936     }
2937     ui_ptr[k] = current_space->array;
2938 
2939     current_space->array           += nzk;
2940     current_space->local_used      += nzk;
2941     current_space->local_remaining -= nzk;
2942 
2943     ui[k+1] = ui[k] + nzk;
2944   }
2945 
2946   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2947   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2948   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
2949 
2950   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2951   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2952   ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */
2953   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2954 
2955   /* put together the new matrix in MATSEQSBAIJ format */
2956 
2957   b               = (Mat_SeqSBAIJ*)fact->data;
2958   b->singlemalloc = PETSC_FALSE;
2959   b->free_a       = PETSC_TRUE;
2960   b->free_ij      = PETSC_TRUE;
2961 
2962   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
2963 
2964   b->j         = uj;
2965   b->i         = ui;
2966   b->diag      = udiag;
2967   b->free_diag = PETSC_TRUE;
2968   b->ilen      = 0;
2969   b->imax      = 0;
2970   b->row       = perm;
2971   b->col       = perm;
2972 
2973   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2974   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2975 
2976   b->icol          = iperm;
2977   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2978 
2979   ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2980   ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2981 
2982   b->maxnz = b->nz = ui[am];
2983 
2984   fact->info.factor_mallocs   = reallocs;
2985   fact->info.fill_ratio_given = fill;
2986   if (ai[am] != 0) {
2987     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2988     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2989   } else {
2990     fact->info.fill_ratio_needed = 0.0;
2991   }
2992 #if defined(PETSC_USE_INFO)
2993   if (ai[am] != 0) {
2994     PetscReal af = fact->info.fill_ratio_needed;
2995     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2996     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2997     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2998   } else {
2999     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
3000   }
3001 #endif
3002   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
3003   PetscFunctionReturn(0);
3004 }
3005 
3006 #undef __FUNCT__
3007 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace"
3008 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
3009 {
3010   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
3011   Mat_SeqSBAIJ       *b;
3012   PetscErrorCode     ierr;
3013   PetscBool          perm_identity;
3014   PetscReal          fill = info->fill;
3015   const PetscInt     *rip,*riip;
3016   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
3017   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
3018   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
3019   PetscFreeSpaceList free_space=NULL,current_space=NULL;
3020   PetscBT            lnkbt;
3021   IS                 iperm;
3022 
3023   PetscFunctionBegin;
3024   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
3025   /* check whether perm is the identity mapping */
3026   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
3027   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
3028   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
3029   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
3030 
3031   /* initialization */
3032   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
3033   ui[0] = 0;
3034 
3035   /* jl: linked list for storing indices of the pivot rows
3036      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3037   ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr);
3038   for (i=0; i<am; i++) {
3039     jl[i] = am; il[i] = 0;
3040   }
3041 
3042   /* create and initialize a linked list for storing column indices of the active row k */
3043   nlnk = am + 1;
3044   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3045 
3046   /* initial FreeSpace size is fill*(ai[am]+1) */
3047   ierr          = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
3048   current_space = free_space;
3049 
3050   for (k=0; k<am; k++) {  /* for each active row k */
3051     /* initialize lnk by the column indices of row rip[k] of A */
3052     nzk   = 0;
3053     ncols = ai[rip[k]+1] - ai[rip[k]];
3054     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3055     ncols_upper = 0;
3056     for (j=0; j<ncols; j++) {
3057       i = riip[*(aj + ai[rip[k]] + j)];
3058       if (i >= k) { /* only take upper triangular entry */
3059         cols[ncols_upper] = i;
3060         ncols_upper++;
3061       }
3062     }
3063     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3064     nzk += nlnk;
3065 
3066     /* update lnk by computing fill-in for each pivot row to be merged in */
3067     prow = jl[k]; /* 1st pivot row */
3068 
3069     while (prow < k) {
3070       nextprow = jl[prow];
3071       /* merge prow into k-th row */
3072       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3073       jmax   = ui[prow+1];
3074       ncols  = jmax-jmin;
3075       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3076       ierr   = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3077       nzk   += nlnk;
3078 
3079       /* update il and jl for prow */
3080       if (jmin < jmax) {
3081         il[prow] = jmin;
3082         j        = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3083       }
3084       prow = nextprow;
3085     }
3086 
3087     /* if free space is not available, make more free space */
3088     if (current_space->local_remaining<nzk) {
3089       i    = am - k + 1; /* num of unfactored rows */
3090       i    = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3091       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
3092       reallocs++;
3093     }
3094 
3095     /* copy data into free space, then initialize lnk */
3096     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3097 
3098     /* add the k-th row into il and jl */
3099     if (nzk-1 > 0) {
3100       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3101       jl[k] = jl[i]; jl[i] = k;
3102       il[k] = ui[k] + 1;
3103     }
3104     ui_ptr[k] = current_space->array;
3105 
3106     current_space->array           += nzk;
3107     current_space->local_used      += nzk;
3108     current_space->local_remaining -= nzk;
3109 
3110     ui[k+1] = ui[k] + nzk;
3111   }
3112 
3113 #if defined(PETSC_USE_INFO)
3114   if (ai[am] != 0) {
3115     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3116     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
3117     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
3118     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
3119   } else {
3120     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
3121   }
3122 #endif
3123 
3124   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
3125   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
3126   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
3127 
3128   /* destroy list of free space and other temporary array(s) */
3129   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
3130   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
3131   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3132 
3133   /* put together the new matrix in MATSEQSBAIJ format */
3134 
3135   b               = (Mat_SeqSBAIJ*)fact->data;
3136   b->singlemalloc = PETSC_FALSE;
3137   b->free_a       = PETSC_TRUE;
3138   b->free_ij      = PETSC_TRUE;
3139 
3140   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
3141 
3142   b->j    = uj;
3143   b->i    = ui;
3144   b->diag = 0;
3145   b->ilen = 0;
3146   b->imax = 0;
3147   b->row  = perm;
3148   b->col  = perm;
3149 
3150   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3151   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3152 
3153   b->icol          = iperm;
3154   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3155 
3156   ierr     = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3157   ierr     = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3158   b->maxnz = b->nz = ui[am];
3159 
3160   fact->info.factor_mallocs   = reallocs;
3161   fact->info.fill_ratio_given = fill;
3162   if (ai[am] != 0) {
3163     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3164   } else {
3165     fact->info.fill_ratio_needed = 0.0;
3166   }
3167   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3168   PetscFunctionReturn(0);
3169 }
3170 
3171 #undef __FUNCT__
3172 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
3173 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3174 {
3175   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3176   PetscErrorCode    ierr;
3177   PetscInt          n   = A->rmap->n;
3178   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3179   PetscScalar       *x,sum;
3180   const PetscScalar *b;
3181   const MatScalar   *aa = a->a,*v;
3182   PetscInt          i,nz;
3183 
3184   PetscFunctionBegin;
3185   if (!n) PetscFunctionReturn(0);
3186 
3187   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3188   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3189 
3190   /* forward solve the lower triangular */
3191   x[0] = b[0];
3192   v    = aa;
3193   vi   = aj;
3194   for (i=1; i<n; i++) {
3195     nz  = ai[i+1] - ai[i];
3196     sum = b[i];
3197     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3198     v   += nz;
3199     vi  += nz;
3200     x[i] = sum;
3201   }
3202 
3203   /* backward solve the upper triangular */
3204   for (i=n-1; i>=0; i--) {
3205     v   = aa + adiag[i+1] + 1;
3206     vi  = aj + adiag[i+1] + 1;
3207     nz  = adiag[i] - adiag[i+1]-1;
3208     sum = x[i];
3209     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3210     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3211   }
3212 
3213   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3214   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3215   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3216   PetscFunctionReturn(0);
3217 }
3218 
3219 #undef __FUNCT__
3220 #define __FUNCT__ "MatSolve_SeqAIJ"
3221 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3222 {
3223   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
3224   IS                iscol = a->col,isrow = a->row;
3225   PetscErrorCode    ierr;
3226   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3227   const PetscInt    *rout,*cout,*r,*c;
3228   PetscScalar       *x,*tmp,sum;
3229   const PetscScalar *b;
3230   const MatScalar   *aa = a->a,*v;
3231 
3232   PetscFunctionBegin;
3233   if (!n) PetscFunctionReturn(0);
3234 
3235   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3236   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3237   tmp  = a->solve_work;
3238 
3239   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
3240   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
3241 
3242   /* forward solve the lower triangular */
3243   tmp[0] = b[r[0]];
3244   v      = aa;
3245   vi     = aj;
3246   for (i=1; i<n; i++) {
3247     nz  = ai[i+1] - ai[i];
3248     sum = b[r[i]];
3249     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3250     tmp[i] = sum;
3251     v     += nz; vi += nz;
3252   }
3253 
3254   /* backward solve the upper triangular */
3255   for (i=n-1; i>=0; i--) {
3256     v   = aa + adiag[i+1]+1;
3257     vi  = aj + adiag[i+1]+1;
3258     nz  = adiag[i]-adiag[i+1]-1;
3259     sum = tmp[i];
3260     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3261     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3262   }
3263 
3264   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
3265   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
3266   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3267   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3268   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
3269   PetscFunctionReturn(0);
3270 }
3271 
3272 #undef __FUNCT__
3273 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
3274 /*
3275     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3276 */
3277 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3278 {
3279   Mat            B = *fact;
3280   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b;
3281   IS             isicol;
3282   PetscErrorCode ierr;
3283   const PetscInt *r,*ic;
3284   PetscInt       i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3285   PetscInt       *bi,*bj,*bdiag,*bdiag_rev;
3286   PetscInt       row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3287   PetscInt       nlnk,*lnk;
3288   PetscBT        lnkbt;
3289   PetscBool      row_identity,icol_identity;
3290   MatScalar      *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3291   const PetscInt *ics;
3292   PetscInt       j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3293   PetscReal      dt     =info->dt,shift=info->shiftamount;
3294   PetscInt       dtcount=(PetscInt)info->dtcount,nnz_max;
3295   PetscBool      missing;
3296 
3297   PetscFunctionBegin;
3298   if (dt      == PETSC_DEFAULT) dt = 0.005;
3299   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3300 
3301   /* ------- symbolic factorization, can be reused ---------*/
3302   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3303   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3304   adiag=a->diag;
3305 
3306   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
3307 
3308   /* bdiag is location of diagonal in factor */
3309   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);     /* becomes b->diag */
3310   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */
3311 
3312   /* allocate row pointers bi */
3313   ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3314 
3315   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3316   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3317   nnz_max = ai[n]+2*n*dtcount+2;
3318 
3319   ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3320   ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3321 
3322   /* put together the new matrix */
3323   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
3324   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
3325   b    = (Mat_SeqAIJ*)B->data;
3326 
3327   b->free_a       = PETSC_TRUE;
3328   b->free_ij      = PETSC_TRUE;
3329   b->singlemalloc = PETSC_FALSE;
3330 
3331   b->a    = ba;
3332   b->j    = bj;
3333   b->i    = bi;
3334   b->diag = bdiag;
3335   b->ilen = 0;
3336   b->imax = 0;
3337   b->row  = isrow;
3338   b->col  = iscol;
3339   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
3340   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
3341   b->icol = isicol;
3342 
3343   ierr     = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
3344   ierr     = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3345   b->maxnz = nnz_max;
3346 
3347   B->factortype            = MAT_FACTOR_ILUDT;
3348   B->info.factor_mallocs   = 0;
3349   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3350   /* ------- end of symbolic factorization ---------*/
3351 
3352   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3353   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3354   ics  = ic;
3355 
3356   /* linked list for storing column indices of the active row */
3357   nlnk = n + 1;
3358   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3359 
3360   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3361   ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr);
3362   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3363   ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr);
3364   ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr);
3365 
3366   bi[0]        = 0;
3367   bdiag[0]     = nnz_max-1; /* location of diag[0] in factor B */
3368   bdiag_rev[n] = bdiag[0];
3369   bi[2*n+1]    = bdiag[0]+1; /* endof bj and ba array */
3370   for (i=0; i<n; i++) {
3371     /* copy initial fill into linked list */
3372     nzi = ai[r[i]+1] - ai[r[i]];
3373     if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3374     nzi_al = adiag[r[i]] - ai[r[i]];
3375     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3376     ajtmp  = aj + ai[r[i]];
3377     ierr   = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3378 
3379     /* load in initial (unfactored row) */
3380     aatmp = a->a + ai[r[i]];
3381     for (j=0; j<nzi; j++) {
3382       rtmp[ics[*ajtmp++]] = *aatmp++;
3383     }
3384 
3385     /* add pivot rows into linked list */
3386     row = lnk[n];
3387     while (row < i) {
3388       nzi_bl = bi[row+1] - bi[row] + 1;
3389       bjtmp  = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3390       ierr   = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
3391       nzi   += nlnk;
3392       row    = lnk[row];
3393     }
3394 
3395     /* copy data from lnk into jtmp, then initialize lnk */
3396     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
3397 
3398     /* numerical factorization */
3399     bjtmp = jtmp;
3400     row   = *bjtmp++; /* 1st pivot row */
3401     while (row < i) {
3402       pc         = rtmp + row;
3403       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3404       multiplier = (*pc) * (*pv);
3405       *pc        = multiplier;
3406       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3407         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3408         pv = ba + bdiag[row+1] + 1;
3409         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3410         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3411         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3412         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3413       }
3414       row = *bjtmp++;
3415     }
3416 
3417     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3418     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3419     nzi_bl   = 0; j = 0;
3420     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3421       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3422       nzi_bl++; j++;
3423     }
3424     nzi_bu = nzi - nzi_bl -1;
3425     while (j < nzi) {
3426       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3427       j++;
3428     }
3429 
3430     bjtmp = bj + bi[i];
3431     batmp = ba + bi[i];
3432     /* apply level dropping rule to L part */
3433     ncut = nzi_al + dtcount;
3434     if (ncut < nzi_bl) {
3435       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
3436       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
3437     } else {
3438       ncut = nzi_bl;
3439     }
3440     for (j=0; j<ncut; j++) {
3441       bjtmp[j] = jtmp[j];
3442       batmp[j] = vtmp[j];
3443       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3444     }
3445     bi[i+1] = bi[i] + ncut;
3446     nzi     = ncut + 1;
3447 
3448     /* apply level dropping rule to U part */
3449     ncut = nzi_au + dtcount;
3450     if (ncut < nzi_bu) {
3451       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
3452       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
3453     } else {
3454       ncut = nzi_bu;
3455     }
3456     nzi += ncut;
3457 
3458     /* mark bdiagonal */
3459     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3460     bdiag_rev[n-i-1] = bdiag[i+1];
3461     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3462     bjtmp            = bj + bdiag[i];
3463     batmp            = ba + bdiag[i];
3464     *bjtmp           = i;
3465     *batmp           = diag_tmp; /* rtmp[i]; */
3466     if (*batmp == 0.0) {
3467       *batmp = dt+shift;
3468       /* printf(" row %d add shift %g\n",i,shift); */
3469     }
3470     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3471     /* printf(" (%d,%g),",*bjtmp,*batmp); */
3472 
3473     bjtmp = bj + bdiag[i+1]+1;
3474     batmp = ba + bdiag[i+1]+1;
3475     for (k=0; k<ncut; k++) {
3476       bjtmp[k] = jtmp[nzi_bl+1+k];
3477       batmp[k] = vtmp[nzi_bl+1+k];
3478       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3479     }
3480     /* printf("\n"); */
3481 
3482     im[i] = nzi;   /* used by PetscLLAddSortedLU() */
3483     /*
3484     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3485     printf(" ----------------------------\n");
3486     */
3487   } /* for (i=0; i<n; i++) */
3488     /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3489   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3490 
3491   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3492   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3493 
3494   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3495   ierr = PetscFree2(im,jtmp);CHKERRQ(ierr);
3496   ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr);
3497   ierr = PetscFree(bdiag_rev);CHKERRQ(ierr);
3498 
3499   ierr     = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
3500   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3501 
3502   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3503   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
3504   if (row_identity && icol_identity) {
3505     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3506   } else {
3507     B->ops->solve = MatSolve_SeqAIJ;
3508   }
3509 
3510   B->ops->solveadd          = 0;
3511   B->ops->solvetranspose    = 0;
3512   B->ops->solvetransposeadd = 0;
3513   B->ops->matsolve          = 0;
3514   B->assembled              = PETSC_TRUE;
3515   B->preallocated           = PETSC_TRUE;
3516   PetscFunctionReturn(0);
3517 }
3518 
3519 /* a wraper of MatILUDTFactor_SeqAIJ() */
3520 #undef __FUNCT__
3521 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
3522 /*
3523     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3524 */
3525 
3526 PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3527 {
3528   PetscErrorCode ierr;
3529 
3530   PetscFunctionBegin;
3531   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
3532   PetscFunctionReturn(0);
3533 }
3534 
3535 /*
3536    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3537    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3538 */
3539 #undef __FUNCT__
3540 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
3541 /*
3542     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3543 */
3544 
3545 PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3546 {
3547   Mat            C     =fact;
3548   Mat_SeqAIJ     *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3549   IS             isrow = b->row,isicol = b->icol;
3550   PetscErrorCode ierr;
3551   const PetscInt *r,*ic,*ics;
3552   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3553   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3554   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3555   PetscReal      dt=info->dt,shift=info->shiftamount;
3556   PetscBool      row_identity, col_identity;
3557 
3558   PetscFunctionBegin;
3559   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3560   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3561   ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
3562   ics  = ic;
3563 
3564   for (i=0; i<n; i++) {
3565     /* initialize rtmp array */
3566     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3567     bjtmp = bj + bi[i];
3568     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3569     rtmp[i] = 0.0;
3570     nzu     = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3571     bjtmp   = bj + bdiag[i+1] + 1;
3572     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3573 
3574     /* load in initial unfactored row of A */
3575     /* printf("row %d\n",i); */
3576     nz    = ai[r[i]+1] - ai[r[i]];
3577     ajtmp = aj + ai[r[i]];
3578     v     = aa + ai[r[i]];
3579     for (j=0; j<nz; j++) {
3580       rtmp[ics[*ajtmp++]] = v[j];
3581       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3582     }
3583     /* printf("\n"); */
3584 
3585     /* numerical factorization */
3586     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3587     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3588     k     = 0;
3589     while (k < nzl) {
3590       row = *bjtmp++;
3591       /* printf("  prow %d\n",row); */
3592       pc         = rtmp + row;
3593       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3594       multiplier = (*pc) * (*pv);
3595       *pc        = multiplier;
3596       if (PetscAbsScalar(multiplier) > dt) {
3597         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3598         pv = b->a + bdiag[row+1] + 1;
3599         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3600         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3601         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3602       }
3603       k++;
3604     }
3605 
3606     /* finished row so stick it into b->a */
3607     /* L-part */
3608     pv  = b->a + bi[i];
3609     pj  = bj + bi[i];
3610     nzl = bi[i+1] - bi[i];
3611     for (j=0; j<nzl; j++) {
3612       pv[j] = rtmp[pj[j]];
3613       /* printf(" (%d,%g),",pj[j],pv[j]); */
3614     }
3615 
3616     /* diagonal: invert diagonal entries for simplier triangular solves */
3617     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3618     b->a[bdiag[i]] = 1.0/rtmp[i];
3619     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3620 
3621     /* U-part */
3622     pv  = b->a + bdiag[i+1] + 1;
3623     pj  = bj + bdiag[i+1] + 1;
3624     nzu = bdiag[i] - bdiag[i+1] - 1;
3625     for (j=0; j<nzu; j++) {
3626       pv[j] = rtmp[pj[j]];
3627       /* printf(" (%d,%g),",pj[j],pv[j]); */
3628     }
3629     /* printf("\n"); */
3630   }
3631 
3632   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3633   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3634   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3635 
3636   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3637   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
3638   if (row_identity && col_identity) {
3639     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3640   } else {
3641     C->ops->solve = MatSolve_SeqAIJ;
3642   }
3643   C->ops->solveadd          = 0;
3644   C->ops->solvetranspose    = 0;
3645   C->ops->solvetransposeadd = 0;
3646   C->ops->matsolve          = 0;
3647   C->assembled              = PETSC_TRUE;
3648   C->preallocated           = PETSC_TRUE;
3649 
3650   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
3651   PetscFunctionReturn(0);
3652 }
3653