xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 4f02bc6a7df4e4b03c783003a8e0899ee35fcb05)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ"
9 /*
10       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
11 
12       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
13 */
14 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
15 {
16   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
17   PetscErrorCode    ierr;
18   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
19   const PetscInt    *ai = a->i, *aj = a->j;
20   const PetscScalar *aa = a->a;
21   PetscBool         *done;
22   PetscReal         best,past = 0,future;
23 
24   PetscFunctionBegin;
25   /* pick initial row */
26   best = -1;
27   for (i=0; i<n; i++) {
28     future = 0.0;
29     for (j=ai[i]; j<ai[i+1]; j++) {
30       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
31       else              past  = PetscAbsScalar(aa[j]);
32     }
33     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34     if (past/future > best) {
35       best    = past/future;
36       current = i;
37     }
38   }
39 
40   ierr     = PetscMalloc1(n,&done);CHKERRQ(ierr);
41   ierr     = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr);
42   ierr     = PetscMalloc1(n,&order);CHKERRQ(ierr);
43   order[0] = current;
44   for (i=0; i<n-1; i++) {
45     done[current] = PETSC_TRUE;
46     best          = -1;
47     /* loop over all neighbors of current pivot */
48     for (j=ai[current]; j<ai[current+1]; j++) {
49       jj = aj[j];
50       if (done[jj]) continue;
51       /* loop over columns of potential next row computing weights for below and above diagonal */
52       past = future = 0.0;
53       for (k=ai[jj]; k<ai[jj+1]; k++) {
54         kk = aj[k];
55         if (done[kk]) past += PetscAbsScalar(aa[k]);
56         else if (kk != jj) future += PetscAbsScalar(aa[k]);
57       }
58       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59       if (past/future > best) {
60         best       = past/future;
61         newcurrent = jj;
62       }
63     }
64     if (best == -1) { /* no neighbors to select from so select best of all that remain */
65       best = -1;
66       for (k=0; k<n; k++) {
67         if (done[k]) continue;
68         future = 0.0;
69         past   = 0.0;
70         for (j=ai[k]; j<ai[k+1]; j++) {
71           kk = aj[j];
72           if (done[kk])       past += PetscAbsScalar(aa[j]);
73           else if (kk != k) future += PetscAbsScalar(aa[j]);
74         }
75         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76         if (past/future > best) {
77           best       = past/future;
78           newcurrent = k;
79         }
80       }
81     }
82     if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
83     current    = newcurrent;
84     order[i+1] = current;
85   }
86   ierr  = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr);
87   *icol = *irow;
88   ierr  = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr);
89   ierr  = PetscFree(done);CHKERRQ(ierr);
90   ierr  = PetscFree(order);CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 
94 #undef __FUNCT__
95 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
96 PETSC_EXTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
97 {
98   PetscInt       n = A->rmap->n;
99   PetscErrorCode ierr;
100 
101   PetscFunctionBegin;
102 #if defined(PETSC_USE_COMPLEX)
103   if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported");
104 #endif
105   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
106   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
107   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
108     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
109 
110     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
111     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
112 
113     ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr);
114   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
115     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
116     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
117 
118     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
119     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
120   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
121   (*B)->factortype = ftype;
122   PetscFunctionReturn(0);
123 }
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace"
127 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
128 {
129   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
130   IS                 isicol;
131   PetscErrorCode     ierr;
132   const PetscInt     *r,*ic;
133   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
134   PetscInt           *bi,*bj,*ajtmp;
135   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
136   PetscReal          f;
137   PetscInt           nlnk,*lnk,k,**bi_ptr;
138   PetscFreeSpaceList free_space=NULL,current_space=NULL;
139   PetscBT            lnkbt;
140   PetscBool          missing;
141 
142   PetscFunctionBegin;
143   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
144   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
145   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
146 
147   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
148   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
149   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
150 
151   /* get new row pointers */
152   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
153   bi[0] = 0;
154 
155   /* bdiag is location of diagonal in factor */
156   ierr     = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
157   bdiag[0] = 0;
158 
159   /* linked list for storing column indices of the active row */
160   nlnk = n + 1;
161   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
162 
163   ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr);
164 
165   /* initial FreeSpace size is f*(ai[n]+1) */
166   f             = info->fill;
167   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr);
168   current_space = free_space;
169 
170   for (i=0; i<n; i++) {
171     /* copy previous fill into linked list */
172     nzi = 0;
173     nnz = ai[r[i]+1] - ai[r[i]];
174     ajtmp = aj + ai[r[i]];
175     ierr  = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
176     nzi  += nlnk;
177 
178     /* add pivot rows into linked list */
179     row = lnk[n];
180     while (row < i) {
181       nzbd  = bdiag[row] - bi[row] + 1;   /* num of entries in the row with column index <= row */
182       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
183       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
184       nzi  += nlnk;
185       row   = lnk[row];
186     }
187     bi[i+1] = bi[i] + nzi;
188     im[i]   = nzi;
189 
190     /* mark bdiag */
191     nzbd = 0;
192     nnz  = nzi;
193     k    = lnk[n];
194     while (nnz-- && k < i) {
195       nzbd++;
196       k = lnk[k];
197     }
198     bdiag[i] = bi[i] + nzbd;
199 
200     /* if free space is not available, make more free space */
201     if (current_space->local_remaining<nzi) {
202       nnz  = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
203       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
204       reallocs++;
205     }
206 
207     /* copy data into free space, then initialize lnk */
208     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
209 
210     bi_ptr[i]                       = current_space->array;
211     current_space->array           += nzi;
212     current_space->local_used      += nzi;
213     current_space->local_remaining -= nzi;
214   }
215 #if defined(PETSC_USE_INFO)
216   if (ai[n] != 0) {
217     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
218     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
219     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
220     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr);
221     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
222   } else {
223     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
224   }
225 #endif
226 
227   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
228   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
229 
230   /* destroy list of free space and other temporary array(s) */
231   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
232   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
233   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
234   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
235 
236   /* put together the new matrix */
237   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
238   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr);
239   b    = (Mat_SeqAIJ*)(B)->data;
240 
241   b->free_a       = PETSC_TRUE;
242   b->free_ij      = PETSC_TRUE;
243   b->singlemalloc = PETSC_FALSE;
244 
245   ierr    = PetscMalloc1(bi[n]+1,&b->a);CHKERRQ(ierr);
246   b->j    = bj;
247   b->i    = bi;
248   b->diag = bdiag;
249   b->ilen = 0;
250   b->imax = 0;
251   b->row  = isrow;
252   b->col  = iscol;
253   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
254   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
255   b->icol = isicol;
256   ierr    = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
257 
258   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
259   ierr     = PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
260   b->maxnz = b->nz = bi[n];
261 
262   (B)->factortype            = MAT_FACTOR_LU;
263   (B)->info.factor_mallocs   = reallocs;
264   (B)->info.fill_ratio_given = f;
265 
266   if (ai[n]) {
267     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
268   } else {
269     (B)->info.fill_ratio_needed = 0.0;
270   }
271   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
272   if (a->inode.size) {
273     (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
280 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
281 {
282   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
283   IS                 isicol;
284   PetscErrorCode     ierr;
285   const PetscInt     *r,*ic,*ai=a->i,*aj=a->j,*ajtmp;
286   PetscInt           i,n=A->rmap->n;
287   PetscInt           *bi,*bj;
288   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
289   PetscReal          f;
290   PetscInt           nlnk,*lnk,k,**bi_ptr;
291   PetscFreeSpaceList free_space=NULL,current_space=NULL;
292   PetscBT            lnkbt;
293   PetscBool          missing;
294 
295   PetscFunctionBegin;
296   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
297   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
298   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
299 
300   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
301   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
302   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
303 
304   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
305   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
306   ierr  = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
307   bi[0] = bdiag[0] = 0;
308 
309   /* linked list for storing column indices of the active row */
310   nlnk = n + 1;
311   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
312 
313   ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr);
314 
315   /* initial FreeSpace size is f*(ai[n]+1) */
316   f             = info->fill;
317   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr);
318   current_space = free_space;
319 
320   for (i=0; i<n; i++) {
321     /* copy previous fill into linked list */
322     nzi = 0;
323     nnz = ai[r[i]+1] - ai[r[i]];
324     ajtmp = aj + ai[r[i]];
325     ierr  = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
326     nzi  += nlnk;
327 
328     /* add pivot rows into linked list */
329     row = lnk[n];
330     while (row < i) {
331       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
332       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
333       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
334       nzi  += nlnk;
335       row   = lnk[row];
336     }
337     bi[i+1] = bi[i] + nzi;
338     im[i]   = nzi;
339 
340     /* mark bdiag */
341     nzbd = 0;
342     nnz  = nzi;
343     k    = lnk[n];
344     while (nnz-- && k < i) {
345       nzbd++;
346       k = lnk[k];
347     }
348     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
349 
350     /* if free space is not available, make more free space */
351     if (current_space->local_remaining<nzi) {
352       /* estimated additional space needed */
353       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi));
354       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
355       reallocs++;
356     }
357 
358     /* copy data into free space, then initialize lnk */
359     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
360 
361     bi_ptr[i]                       = current_space->array;
362     current_space->array           += nzi;
363     current_space->local_used      += nzi;
364     current_space->local_remaining -= nzi;
365   }
366 
367   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
368   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
369 
370   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
371   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
372   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
373   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
374   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
375 
376   /* put together the new matrix */
377   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
378   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr);
379   b    = (Mat_SeqAIJ*)(B)->data;
380 
381   b->free_a       = PETSC_TRUE;
382   b->free_ij      = PETSC_TRUE;
383   b->singlemalloc = PETSC_FALSE;
384 
385   ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr);
386 
387   b->j    = bj;
388   b->i    = bi;
389   b->diag = bdiag;
390   b->ilen = 0;
391   b->imax = 0;
392   b->row  = isrow;
393   b->col  = iscol;
394   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
395   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
396   b->icol = isicol;
397   ierr    = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
398 
399   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
400   ierr     = PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
401   b->maxnz = b->nz = bdiag[0]+1;
402 
403   B->factortype            = MAT_FACTOR_LU;
404   B->info.factor_mallocs   = reallocs;
405   B->info.fill_ratio_given = f;
406 
407   if (ai[n]) {
408     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
409   } else {
410     B->info.fill_ratio_needed = 0.0;
411   }
412 #if defined(PETSC_USE_INFO)
413   if (ai[n] != 0) {
414     PetscReal af = B->info.fill_ratio_needed;
415     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
416     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
417     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr);
418     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
419   } else {
420     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
421   }
422 #endif
423   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
424   if (a->inode.size) {
425     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
426   }
427   ierr = MatSeqAIJCheckInode_FactorLU(B);CHKERRQ(ierr);
428   PetscFunctionReturn(0);
429 }
430 
431 /*
432     Trouble in factorization, should we dump the original matrix?
433 */
434 #undef __FUNCT__
435 #define __FUNCT__ "MatFactorDumpMatrix"
436 PetscErrorCode MatFactorDumpMatrix(Mat A)
437 {
438   PetscErrorCode ierr;
439   PetscBool      flg = PETSC_FALSE;
440 
441   PetscFunctionBegin;
442   ierr = PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL);CHKERRQ(ierr);
443   if (flg) {
444     PetscViewer viewer;
445     char        filename[PETSC_MAX_PATH_LEN];
446 
447     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
448     ierr = PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
449     ierr = MatView(A,viewer);CHKERRQ(ierr);
450     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
451   }
452   PetscFunctionReturn(0);
453 }
454 
455 #undef __FUNCT__
456 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
457 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
458 {
459   Mat             C     =B;
460   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
461   IS              isrow = b->row,isicol = b->icol;
462   PetscErrorCode  ierr;
463   const PetscInt  *r,*ic,*ics;
464   const PetscInt  n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
465   PetscInt        i,j,k,nz,nzL,row,*pj;
466   const PetscInt  *ajtmp,*bjtmp;
467   MatScalar       *rtmp,*pc,multiplier,*pv;
468   const MatScalar *aa=a->a,*v;
469   PetscBool       row_identity,col_identity;
470   FactorShiftCtx  sctx;
471   const PetscInt  *ddiag;
472   PetscReal       rs;
473   MatScalar       d;
474 
475   PetscFunctionBegin;
476   /* MatPivotSetUp(): initialize shift context sctx */
477   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
478 
479   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
480     ddiag          = a->diag;
481     sctx.shift_top = info->zeropivot;
482     for (i=0; i<n; i++) {
483       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
484       d  = (aa)[ddiag[i]];
485       rs = -PetscAbsScalar(d) - PetscRealPart(d);
486       v  = aa+ai[i];
487       nz = ai[i+1] - ai[i];
488       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
489       if (rs>sctx.shift_top) sctx.shift_top = rs;
490     }
491     sctx.shift_top *= 1.1;
492     sctx.nshift_max = 5;
493     sctx.shift_lo   = 0.;
494     sctx.shift_hi   = 1.;
495   }
496 
497   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
498   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
499   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
500   ics  = ic;
501 
502   do {
503     sctx.newshift = PETSC_FALSE;
504     for (i=0; i<n; i++) {
505       /* zero rtmp */
506       /* L part */
507       nz    = bi[i+1] - bi[i];
508       bjtmp = bj + bi[i];
509       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
510 
511       /* U part */
512       nz    = bdiag[i]-bdiag[i+1];
513       bjtmp = bj + bdiag[i+1]+1;
514       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
515 
516       /* load in initial (unfactored row) */
517       nz    = ai[r[i]+1] - ai[r[i]];
518       ajtmp = aj + ai[r[i]];
519       v     = aa + ai[r[i]];
520       for (j=0; j<nz; j++) {
521         rtmp[ics[ajtmp[j]]] = v[j];
522       }
523       /* ZeropivotApply() */
524       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
525 
526       /* elimination */
527       bjtmp = bj + bi[i];
528       row   = *bjtmp++;
529       nzL   = bi[i+1] - bi[i];
530       for (k=0; k < nzL; k++) {
531         pc = rtmp + row;
532         if (*pc != 0.0) {
533           pv         = b->a + bdiag[row];
534           multiplier = *pc * (*pv);
535           *pc        = multiplier;
536 
537           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
538           pv = b->a + bdiag[row+1]+1;
539           nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
540 
541           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
542           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
543         }
544         row = *bjtmp++;
545       }
546 
547       /* finished row so stick it into b->a */
548       rs = 0.0;
549       /* L part */
550       pv = b->a + bi[i];
551       pj = b->j + bi[i];
552       nz = bi[i+1] - bi[i];
553       for (j=0; j<nz; j++) {
554         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
555       }
556 
557       /* U part */
558       pv = b->a + bdiag[i+1]+1;
559       pj = b->j + bdiag[i+1]+1;
560       nz = bdiag[i] - bdiag[i+1]-1;
561       for (j=0; j<nz; j++) {
562         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
563       }
564 
565       sctx.rs = rs;
566       sctx.pv = rtmp[i];
567       ierr    = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr);
568       if (sctx.newshift) break; /* break for-loop */
569       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
570 
571       /* Mark diagonal and invert diagonal for simplier triangular solves */
572       pv  = b->a + bdiag[i];
573       *pv = 1.0/rtmp[i];
574 
575     } /* endof for (i=0; i<n; i++) { */
576 
577     /* MatPivotRefine() */
578     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
579       /*
580        * if no shift in this attempt & shifting & started shifting & can refine,
581        * then try lower shift
582        */
583       sctx.shift_hi       = sctx.shift_fraction;
584       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
585       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
586       sctx.newshift       = PETSC_TRUE;
587       sctx.nshift++;
588     }
589   } while (sctx.newshift);
590 
591   ierr = PetscFree(rtmp);CHKERRQ(ierr);
592   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
593   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
594 
595   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
596   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
597   if (b->inode.size) {
598     C->ops->solve = MatSolve_SeqAIJ_Inode;
599   } else if (row_identity && col_identity) {
600     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
601   } else {
602     C->ops->solve = MatSolve_SeqAIJ;
603   }
604   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
605   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
606   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
607   C->ops->matsolve          = MatMatSolve_SeqAIJ;
608   C->assembled              = PETSC_TRUE;
609   C->preallocated           = PETSC_TRUE;
610 
611   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
612 
613   /* MatShiftView(A,info,&sctx) */
614   if (sctx.nshift) {
615     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
616       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
617     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
618       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
619     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
620       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr);
621     }
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace"
628 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
629 {
630   Mat             C     =B;
631   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
632   IS              isrow = b->row,isicol = b->icol;
633   PetscErrorCode  ierr;
634   const PetscInt  *r,*ic,*ics;
635   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
636   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
637   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
638   MatScalar       *pv,*rtmp,*pc,multiplier,d;
639   const MatScalar *v,*aa=a->a;
640   PetscReal       rs=0.0;
641   FactorShiftCtx  sctx;
642   const PetscInt  *ddiag;
643   PetscBool       row_identity, col_identity;
644 
645   PetscFunctionBegin;
646   /* MatPivotSetUp(): initialize shift context sctx */
647   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
648 
649   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
650     ddiag          = a->diag;
651     sctx.shift_top = info->zeropivot;
652     for (i=0; i<n; i++) {
653       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
654       d  = (aa)[ddiag[i]];
655       rs = -PetscAbsScalar(d) - PetscRealPart(d);
656       v  = aa+ai[i];
657       nz = ai[i+1] - ai[i];
658       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
659       if (rs>sctx.shift_top) sctx.shift_top = rs;
660     }
661     sctx.shift_top *= 1.1;
662     sctx.nshift_max = 5;
663     sctx.shift_lo   = 0.;
664     sctx.shift_hi   = 1.;
665   }
666 
667   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
668   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
669   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
670   ics  = ic;
671 
672   do {
673     sctx.newshift = PETSC_FALSE;
674     for (i=0; i<n; i++) {
675       nz    = bi[i+1] - bi[i];
676       bjtmp = bj + bi[i];
677       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
678 
679       /* load in initial (unfactored row) */
680       nz    = ai[r[i]+1] - ai[r[i]];
681       ajtmp = aj + ai[r[i]];
682       v     = aa + ai[r[i]];
683       for (j=0; j<nz; j++) {
684         rtmp[ics[ajtmp[j]]] = v[j];
685       }
686       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
687 
688       row = *bjtmp++;
689       while  (row < i) {
690         pc = rtmp + row;
691         if (*pc != 0.0) {
692           pv         = b->a + diag_offset[row];
693           pj         = b->j + diag_offset[row] + 1;
694           multiplier = *pc / *pv++;
695           *pc        = multiplier;
696           nz         = bi[row+1] - diag_offset[row] - 1;
697           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
698           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
699         }
700         row = *bjtmp++;
701       }
702       /* finished row so stick it into b->a */
703       pv   = b->a + bi[i];
704       pj   = b->j + bi[i];
705       nz   = bi[i+1] - bi[i];
706       diag = diag_offset[i] - bi[i];
707       rs   = 0.0;
708       for (j=0; j<nz; j++) {
709         pv[j] = rtmp[pj[j]];
710         rs   += PetscAbsScalar(pv[j]);
711       }
712       rs -= PetscAbsScalar(pv[diag]);
713 
714       sctx.rs = rs;
715       sctx.pv = pv[diag];
716       ierr    = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr);
717       if (sctx.newshift) break;
718       pv[diag] = sctx.pv;
719     }
720 
721     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
722       /*
723        * if no shift in this attempt & shifting & started shifting & can refine,
724        * then try lower shift
725        */
726       sctx.shift_hi       = sctx.shift_fraction;
727       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
728       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
729       sctx.newshift       = PETSC_TRUE;
730       sctx.nshift++;
731     }
732   } while (sctx.newshift);
733 
734   /* invert diagonal entries for simplier triangular solves */
735   for (i=0; i<n; i++) {
736     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
737   }
738   ierr = PetscFree(rtmp);CHKERRQ(ierr);
739   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
740   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
741 
742   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
743   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
744   if (row_identity && col_identity) {
745     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
746   } else {
747     C->ops->solve = MatSolve_SeqAIJ_inplace;
748   }
749   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
750   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
751   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
752   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
753 
754   C->assembled    = PETSC_TRUE;
755   C->preallocated = PETSC_TRUE;
756 
757   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
758   if (sctx.nshift) {
759     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
760       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
761     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
762       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
763     }
764   }
765   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
766   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
767 
768   ierr = MatSeqAIJCheckInode(C);CHKERRQ(ierr);
769   PetscFunctionReturn(0);
770 }
771 
772 /*
773    This routine implements inplace ILU(0) with row or/and column permutations.
774    Input:
775      A - original matrix
776    Output;
777      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
778          a->j (col index) is permuted by the inverse of colperm, then sorted
779          a->a reordered accordingly with a->j
780          a->diag (ptr to diagonal elements) is updated.
781 */
782 #undef __FUNCT__
783 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
784 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
785 {
786   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data;
787   IS              isrow = a->row,isicol = a->icol;
788   PetscErrorCode  ierr;
789   const PetscInt  *r,*ic,*ics;
790   PetscInt        i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
791   PetscInt        *ajtmp,nz,row;
792   PetscInt        *diag = a->diag,nbdiag,*pj;
793   PetscScalar     *rtmp,*pc,multiplier,d;
794   MatScalar       *pv,*v;
795   PetscReal       rs;
796   FactorShiftCtx  sctx;
797   const MatScalar *aa=a->a,*vtmp;
798 
799   PetscFunctionBegin;
800   if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
801 
802   /* MatPivotSetUp(): initialize shift context sctx */
803   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
804 
805   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
806     const PetscInt *ddiag = a->diag;
807     sctx.shift_top = info->zeropivot;
808     for (i=0; i<n; i++) {
809       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
810       d    = (aa)[ddiag[i]];
811       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
812       vtmp = aa+ai[i];
813       nz   = ai[i+1] - ai[i];
814       for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
815       if (rs>sctx.shift_top) sctx.shift_top = rs;
816     }
817     sctx.shift_top *= 1.1;
818     sctx.nshift_max = 5;
819     sctx.shift_lo   = 0.;
820     sctx.shift_hi   = 1.;
821   }
822 
823   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
824   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
825   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
826   ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
827   ics  = ic;
828 
829 #if defined(MV)
830   sctx.shift_top      = 0.;
831   sctx.nshift_max     = 0;
832   sctx.shift_lo       = 0.;
833   sctx.shift_hi       = 0.;
834   sctx.shift_fraction = 0.;
835 
836   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
837     sctx.shift_top = 0.;
838     for (i=0; i<n; i++) {
839       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
840       d  = (a->a)[diag[i]];
841       rs = -PetscAbsScalar(d) - PetscRealPart(d);
842       v  = a->a+ai[i];
843       nz = ai[i+1] - ai[i];
844       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
845       if (rs>sctx.shift_top) sctx.shift_top = rs;
846     }
847     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
848     sctx.shift_top *= 1.1;
849     sctx.nshift_max = 5;
850     sctx.shift_lo   = 0.;
851     sctx.shift_hi   = 1.;
852   }
853 
854   sctx.shift_amount = 0.;
855   sctx.nshift       = 0;
856 #endif
857 
858   do {
859     sctx.newshift = PETSC_FALSE;
860     for (i=0; i<n; i++) {
861       /* load in initial unfactored row */
862       nz    = ai[r[i]+1] - ai[r[i]];
863       ajtmp = aj + ai[r[i]];
864       v     = a->a + ai[r[i]];
865       /* sort permuted ajtmp and values v accordingly */
866       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
867       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
868 
869       diag[r[i]] = ai[r[i]];
870       for (j=0; j<nz; j++) {
871         rtmp[ajtmp[j]] = v[j];
872         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
873       }
874       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
875 
876       row = *ajtmp++;
877       while  (row < i) {
878         pc = rtmp + row;
879         if (*pc != 0.0) {
880           pv = a->a + diag[r[row]];
881           pj = aj + diag[r[row]] + 1;
882 
883           multiplier = *pc / *pv++;
884           *pc        = multiplier;
885           nz         = ai[r[row]+1] - diag[r[row]] - 1;
886           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
887           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
888         }
889         row = *ajtmp++;
890       }
891       /* finished row so overwrite it onto a->a */
892       pv     = a->a + ai[r[i]];
893       pj     = aj + ai[r[i]];
894       nz     = ai[r[i]+1] - ai[r[i]];
895       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
896 
897       rs = 0.0;
898       for (j=0; j<nz; j++) {
899         pv[j] = rtmp[pj[j]];
900         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
901       }
902 
903       sctx.rs = rs;
904       sctx.pv = pv[nbdiag];
905       ierr    = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr);
906       if (sctx.newshift) break;
907       pv[nbdiag] = sctx.pv;
908     }
909 
910     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
911       /*
912        * if no shift in this attempt & shifting & started shifting & can refine,
913        * then try lower shift
914        */
915       sctx.shift_hi       = sctx.shift_fraction;
916       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
917       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
918       sctx.newshift       = PETSC_TRUE;
919       sctx.nshift++;
920     }
921   } while (sctx.newshift);
922 
923   /* invert diagonal entries for simplier triangular solves */
924   for (i=0; i<n; i++) {
925     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
926   }
927 
928   ierr = PetscFree(rtmp);CHKERRQ(ierr);
929   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
930   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
931 
932   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
933   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
934   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
935   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
936 
937   A->assembled    = PETSC_TRUE;
938   A->preallocated = PETSC_TRUE;
939 
940   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
941   if (sctx.nshift) {
942     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
943       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
944     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
945       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
946     }
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 /* ----------------------------------------------------------- */
952 #undef __FUNCT__
953 #define __FUNCT__ "MatLUFactor_SeqAIJ"
954 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
955 {
956   PetscErrorCode ierr;
957   Mat            C;
958 
959   PetscFunctionBegin;
960   ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
961   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
962   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
963 
964   A->ops->solve          = C->ops->solve;
965   A->ops->solvetranspose = C->ops->solvetranspose;
966 
967   ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
968   ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 /* ----------------------------------------------------------- */
972 
973 
974 #undef __FUNCT__
975 #define __FUNCT__ "MatSolve_SeqAIJ_inplace"
976 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
977 {
978   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
979   IS                iscol = a->col,isrow = a->row;
980   PetscErrorCode    ierr;
981   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
982   PetscInt          nz;
983   const PetscInt    *rout,*cout,*r,*c;
984   PetscScalar       *x,*tmp,*tmps,sum;
985   const PetscScalar *b;
986   const MatScalar   *aa = a->a,*v;
987 
988   PetscFunctionBegin;
989   if (!n) PetscFunctionReturn(0);
990 
991   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
992   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
993   tmp  = a->solve_work;
994 
995   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
996   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
997 
998   /* forward solve the lower triangular */
999   tmp[0] = b[*r++];
1000   tmps   = tmp;
1001   for (i=1; i<n; i++) {
1002     v   = aa + ai[i];
1003     vi  = aj + ai[i];
1004     nz  = a->diag[i] - ai[i];
1005     sum = b[*r++];
1006     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1007     tmp[i] = sum;
1008   }
1009 
1010   /* backward solve the upper triangular */
1011   for (i=n-1; i>=0; i--) {
1012     v   = aa + a->diag[i] + 1;
1013     vi  = aj + a->diag[i] + 1;
1014     nz  = ai[i+1] - a->diag[i] - 1;
1015     sum = tmp[i];
1016     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1017     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1018   }
1019 
1020   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1021   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1022   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1023   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1024   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace"
1030 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1031 {
1032   Mat_SeqAIJ      *a    = (Mat_SeqAIJ*)A->data;
1033   IS              iscol = a->col,isrow = a->row;
1034   PetscErrorCode  ierr;
1035   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1036   PetscInt        nz,neq;
1037   const PetscInt  *rout,*cout,*r,*c;
1038   PetscScalar     *x,*b,*tmp,*tmps,sum;
1039   const MatScalar *aa = a->a,*v;
1040   PetscBool       bisdense,xisdense;
1041 
1042   PetscFunctionBegin;
1043   if (!n) PetscFunctionReturn(0);
1044 
1045   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1046   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1047   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1048   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1049 
1050   ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
1051   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1052 
1053   tmp  = a->solve_work;
1054   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1055   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1056 
1057   for (neq=0; neq<B->cmap->n; neq++) {
1058     /* forward solve the lower triangular */
1059     tmp[0] = b[r[0]];
1060     tmps   = tmp;
1061     for (i=1; i<n; i++) {
1062       v   = aa + ai[i];
1063       vi  = aj + ai[i];
1064       nz  = a->diag[i] - ai[i];
1065       sum = b[r[i]];
1066       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1067       tmp[i] = sum;
1068     }
1069     /* backward solve the upper triangular */
1070     for (i=n-1; i>=0; i--) {
1071       v   = aa + a->diag[i] + 1;
1072       vi  = aj + a->diag[i] + 1;
1073       nz  = ai[i+1] - a->diag[i] - 1;
1074       sum = tmp[i];
1075       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1076       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1077     }
1078 
1079     b += n;
1080     x += n;
1081   }
1082   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1083   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1084   ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
1085   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1086   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatMatSolve_SeqAIJ"
1092 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1093 {
1094   Mat_SeqAIJ      *a    = (Mat_SeqAIJ*)A->data;
1095   IS              iscol = a->col,isrow = a->row;
1096   PetscErrorCode  ierr;
1097   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1098   PetscInt        nz,neq;
1099   const PetscInt  *rout,*cout,*r,*c;
1100   PetscScalar     *x,*b,*tmp,sum;
1101   const MatScalar *aa = a->a,*v;
1102   PetscBool       bisdense,xisdense;
1103 
1104   PetscFunctionBegin;
1105   if (!n) PetscFunctionReturn(0);
1106 
1107   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1108   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1109   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1110   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1111 
1112   ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
1113   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1114 
1115   tmp  = a->solve_work;
1116   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1117   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1118 
1119   for (neq=0; neq<B->cmap->n; neq++) {
1120     /* forward solve the lower triangular */
1121     tmp[0] = b[r[0]];
1122     v      = aa;
1123     vi     = aj;
1124     for (i=1; i<n; i++) {
1125       nz  = ai[i+1] - ai[i];
1126       sum = b[r[i]];
1127       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1128       tmp[i] = sum;
1129       v     += nz; vi += nz;
1130     }
1131 
1132     /* backward solve the upper triangular */
1133     for (i=n-1; i>=0; i--) {
1134       v   = aa + adiag[i+1]+1;
1135       vi  = aj + adiag[i+1]+1;
1136       nz  = adiag[i]-adiag[i+1]-1;
1137       sum = tmp[i];
1138       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1139       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1140     }
1141 
1142     b += n;
1143     x += n;
1144   }
1145   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1146   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1147   ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
1148   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1149   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 #undef __FUNCT__
1154 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
1155 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1156 {
1157   Mat_SeqAIJ      *a    = (Mat_SeqAIJ*)A->data;
1158   IS              iscol = a->col,isrow = a->row;
1159   PetscErrorCode  ierr;
1160   const PetscInt  *r,*c,*rout,*cout;
1161   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1162   PetscInt        nz,row;
1163   PetscScalar     *x,*b,*tmp,*tmps,sum;
1164   const MatScalar *aa = a->a,*v;
1165 
1166   PetscFunctionBegin;
1167   if (!n) PetscFunctionReturn(0);
1168 
1169   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1170   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1171   tmp  = a->solve_work;
1172 
1173   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1174   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1175 
1176   /* forward solve the lower triangular */
1177   tmp[0] = b[*r++];
1178   tmps   = tmp;
1179   for (row=1; row<n; row++) {
1180     i   = rout[row]; /* permuted row */
1181     v   = aa + ai[i];
1182     vi  = aj + ai[i];
1183     nz  = a->diag[i] - ai[i];
1184     sum = b[*r++];
1185     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1186     tmp[row] = sum;
1187   }
1188 
1189   /* backward solve the upper triangular */
1190   for (row=n-1; row>=0; row--) {
1191     i   = rout[row]; /* permuted row */
1192     v   = aa + a->diag[i] + 1;
1193     vi  = aj + a->diag[i] + 1;
1194     nz  = ai[i+1] - a->diag[i] - 1;
1195     sum = tmp[row];
1196     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1197     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1198   }
1199 
1200   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1201   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1202   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1203   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1204   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 /* ----------------------------------------------------------- */
1209 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1210 #undef __FUNCT__
1211 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace"
1212 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1213 {
1214   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1215   PetscErrorCode    ierr;
1216   PetscInt          n   = A->rmap->n;
1217   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1218   PetscScalar       *x;
1219   const PetscScalar *b;
1220   const MatScalar   *aa = a->a;
1221 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1222   PetscInt        adiag_i,i,nz,ai_i;
1223   const PetscInt  *vi;
1224   const MatScalar *v;
1225   PetscScalar     sum;
1226 #endif
1227 
1228   PetscFunctionBegin;
1229   if (!n) PetscFunctionReturn(0);
1230 
1231   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1232   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1233 
1234 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1235   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1236 #else
1237   /* forward solve the lower triangular */
1238   x[0] = b[0];
1239   for (i=1; i<n; i++) {
1240     ai_i = ai[i];
1241     v    = aa + ai_i;
1242     vi   = aj + ai_i;
1243     nz   = adiag[i] - ai_i;
1244     sum  = b[i];
1245     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1246     x[i] = sum;
1247   }
1248 
1249   /* backward solve the upper triangular */
1250   for (i=n-1; i>=0; i--) {
1251     adiag_i = adiag[i];
1252     v       = aa + adiag_i + 1;
1253     vi      = aj + adiag_i + 1;
1254     nz      = ai[i+1] - adiag_i - 1;
1255     sum     = x[i];
1256     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1257     x[i] = sum*aa[adiag_i];
1258   }
1259 #endif
1260   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1261   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1262   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace"
1268 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1269 {
1270   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1271   IS                iscol = a->col,isrow = a->row;
1272   PetscErrorCode    ierr;
1273   PetscInt          i, n = A->rmap->n,j;
1274   PetscInt          nz;
1275   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1276   PetscScalar       *x,*tmp,sum;
1277   const PetscScalar *b;
1278   const MatScalar   *aa = a->a,*v;
1279 
1280   PetscFunctionBegin;
1281   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1282 
1283   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1284   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1285   tmp  = a->solve_work;
1286 
1287   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1288   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1289 
1290   /* forward solve the lower triangular */
1291   tmp[0] = b[*r++];
1292   for (i=1; i<n; i++) {
1293     v   = aa + ai[i];
1294     vi  = aj + ai[i];
1295     nz  = a->diag[i] - ai[i];
1296     sum = b[*r++];
1297     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1298     tmp[i] = sum;
1299   }
1300 
1301   /* backward solve the upper triangular */
1302   for (i=n-1; i>=0; i--) {
1303     v   = aa + a->diag[i] + 1;
1304     vi  = aj + a->diag[i] + 1;
1305     nz  = ai[i+1] - a->diag[i] - 1;
1306     sum = tmp[i];
1307     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1308     tmp[i]   = sum*aa[a->diag[i]];
1309     x[*c--] += tmp[i];
1310   }
1311 
1312   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1313   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1314   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1315   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1316   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1322 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1323 {
1324   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1325   IS                iscol = a->col,isrow = a->row;
1326   PetscErrorCode    ierr;
1327   PetscInt          i, n = A->rmap->n,j;
1328   PetscInt          nz;
1329   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1330   PetscScalar       *x,*tmp,sum;
1331   const PetscScalar *b;
1332   const MatScalar   *aa = a->a,*v;
1333 
1334   PetscFunctionBegin;
1335   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1336 
1337   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1338   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1339   tmp  = a->solve_work;
1340 
1341   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1342   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1343 
1344   /* forward solve the lower triangular */
1345   tmp[0] = b[r[0]];
1346   v      = aa;
1347   vi     = aj;
1348   for (i=1; i<n; i++) {
1349     nz  = ai[i+1] - ai[i];
1350     sum = b[r[i]];
1351     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1352     tmp[i] = sum;
1353     v     += nz;
1354     vi    += nz;
1355   }
1356 
1357   /* backward solve the upper triangular */
1358   v  = aa + adiag[n-1];
1359   vi = aj + adiag[n-1];
1360   for (i=n-1; i>=0; i--) {
1361     nz  = adiag[i] - adiag[i+1] - 1;
1362     sum = tmp[i];
1363     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1364     tmp[i]   = sum*v[nz];
1365     x[c[i]] += tmp[i];
1366     v       += nz+1; vi += nz+1;
1367   }
1368 
1369   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1370   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1371   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1372   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1373   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace"
1379 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1380 {
1381   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1382   IS                iscol = a->col,isrow = a->row;
1383   PetscErrorCode    ierr;
1384   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1385   PetscInt          i,n = A->rmap->n,j;
1386   PetscInt          nz;
1387   PetscScalar       *x,*tmp,s1;
1388   const MatScalar   *aa = a->a,*v;
1389   const PetscScalar *b;
1390 
1391   PetscFunctionBegin;
1392   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1393   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1394   tmp  = a->solve_work;
1395 
1396   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1397   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1398 
1399   /* copy the b into temp work space according to permutation */
1400   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1401 
1402   /* forward solve the U^T */
1403   for (i=0; i<n; i++) {
1404     v   = aa + diag[i];
1405     vi  = aj + diag[i] + 1;
1406     nz  = ai[i+1] - diag[i] - 1;
1407     s1  = tmp[i];
1408     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1409     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1410     tmp[i] = s1;
1411   }
1412 
1413   /* backward solve the L^T */
1414   for (i=n-1; i>=0; i--) {
1415     v  = aa + diag[i] - 1;
1416     vi = aj + diag[i] - 1;
1417     nz = diag[i] - ai[i];
1418     s1 = tmp[i];
1419     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1420   }
1421 
1422   /* copy tmp into x according to permutation */
1423   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1424 
1425   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1426   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1427   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1428   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1429 
1430   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1436 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1437 {
1438   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1439   IS                iscol = a->col,isrow = a->row;
1440   PetscErrorCode    ierr;
1441   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1442   PetscInt          i,n = A->rmap->n,j;
1443   PetscInt          nz;
1444   PetscScalar       *x,*tmp,s1;
1445   const MatScalar   *aa = a->a,*v;
1446   const PetscScalar *b;
1447 
1448   PetscFunctionBegin;
1449   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1450   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1451   tmp  = a->solve_work;
1452 
1453   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1454   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1455 
1456   /* copy the b into temp work space according to permutation */
1457   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1458 
1459   /* forward solve the U^T */
1460   for (i=0; i<n; i++) {
1461     v   = aa + adiag[i+1] + 1;
1462     vi  = aj + adiag[i+1] + 1;
1463     nz  = adiag[i] - adiag[i+1] - 1;
1464     s1  = tmp[i];
1465     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1466     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1467     tmp[i] = s1;
1468   }
1469 
1470   /* backward solve the L^T */
1471   for (i=n-1; i>=0; i--) {
1472     v  = aa + ai[i];
1473     vi = aj + ai[i];
1474     nz = ai[i+1] - ai[i];
1475     s1 = tmp[i];
1476     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1477   }
1478 
1479   /* copy tmp into x according to permutation */
1480   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1481 
1482   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1483   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1484   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1485   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1486 
1487   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 #undef __FUNCT__
1492 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace"
1493 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1494 {
1495   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1496   IS                iscol = a->col,isrow = a->row;
1497   PetscErrorCode    ierr;
1498   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1499   PetscInt          i,n = A->rmap->n,j;
1500   PetscInt          nz;
1501   PetscScalar       *x,*tmp,s1;
1502   const MatScalar   *aa = a->a,*v;
1503   const PetscScalar *b;
1504 
1505   PetscFunctionBegin;
1506   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1507   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1508   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1509   tmp  = a->solve_work;
1510 
1511   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1512   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1513 
1514   /* copy the b into temp work space according to permutation */
1515   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1516 
1517   /* forward solve the U^T */
1518   for (i=0; i<n; i++) {
1519     v   = aa + diag[i];
1520     vi  = aj + diag[i] + 1;
1521     nz  = ai[i+1] - diag[i] - 1;
1522     s1  = tmp[i];
1523     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1524     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1525     tmp[i] = s1;
1526   }
1527 
1528   /* backward solve the L^T */
1529   for (i=n-1; i>=0; i--) {
1530     v  = aa + diag[i] - 1;
1531     vi = aj + diag[i] - 1;
1532     nz = diag[i] - ai[i];
1533     s1 = tmp[i];
1534     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1535   }
1536 
1537   /* copy tmp into x according to permutation */
1538   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1539 
1540   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1541   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1542   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1543   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1544 
1545   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 #undef __FUNCT__
1550 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1551 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1552 {
1553   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1554   IS                iscol = a->col,isrow = a->row;
1555   PetscErrorCode    ierr;
1556   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1557   PetscInt          i,n = A->rmap->n,j;
1558   PetscInt          nz;
1559   PetscScalar       *x,*tmp,s1;
1560   const MatScalar   *aa = a->a,*v;
1561   const PetscScalar *b;
1562 
1563   PetscFunctionBegin;
1564   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1565   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1566   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1567   tmp  = a->solve_work;
1568 
1569   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1570   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1571 
1572   /* copy the b into temp work space according to permutation */
1573   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1574 
1575   /* forward solve the U^T */
1576   for (i=0; i<n; i++) {
1577     v   = aa + adiag[i+1] + 1;
1578     vi  = aj + adiag[i+1] + 1;
1579     nz  = adiag[i] - adiag[i+1] - 1;
1580     s1  = tmp[i];
1581     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1582     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1583     tmp[i] = s1;
1584   }
1585 
1586 
1587   /* backward solve the L^T */
1588   for (i=n-1; i>=0; i--) {
1589     v  = aa + ai[i];
1590     vi = aj + ai[i];
1591     nz = ai[i+1] - ai[i];
1592     s1 = tmp[i];
1593     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1594   }
1595 
1596   /* copy tmp into x according to permutation */
1597   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1598 
1599   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1600   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1601   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1602   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1603 
1604   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 /* ----------------------------------------------------------------*/
1609 
1610 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool);
1611 
1612 /*
1613    ilu() under revised new data structure.
1614    Factored arrays bj and ba are stored as
1615      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1616 
1617    bi=fact->i is an array of size n+1, in which
1618    bi+
1619      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1620      bi[n]:  points to L(n-1,n-1)+1
1621 
1622   bdiag=fact->diag is an array of size n+1,in which
1623      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1624      bdiag[n]: points to entry of U(n-1,0)-1
1625 
1626    U(i,:) contains bdiag[i] as its last entry, i.e.,
1627     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1628 */
1629 #undef __FUNCT__
1630 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0"
1631 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1632 {
1633   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
1634   PetscErrorCode ierr;
1635   const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1636   PetscInt       i,j,k=0,nz,*bi,*bj,*bdiag;
1637   IS             isicol;
1638 
1639   PetscFunctionBegin;
1640   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1641   ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr);
1642   b    = (Mat_SeqAIJ*)(fact)->data;
1643 
1644   /* allocate matrix arrays for new data structure */
1645   ierr = PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);CHKERRQ(ierr);
1646   ierr = PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1647 
1648   b->singlemalloc = PETSC_TRUE;
1649   if (!b->diag) {
1650     ierr = PetscMalloc1(n+1,&b->diag);CHKERRQ(ierr);
1651     ierr = PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1652   }
1653   bdiag = b->diag;
1654 
1655   if (n > 0) {
1656     ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr);
1657   }
1658 
1659   /* set bi and bj with new data structure */
1660   bi = b->i;
1661   bj = b->j;
1662 
1663   /* L part */
1664   bi[0] = 0;
1665   for (i=0; i<n; i++) {
1666     nz      = adiag[i] - ai[i];
1667     bi[i+1] = bi[i] + nz;
1668     aj      = a->j + ai[i];
1669     for (j=0; j<nz; j++) {
1670       /*   *bj = aj[j]; bj++; */
1671       bj[k++] = aj[j];
1672     }
1673   }
1674 
1675   /* U part */
1676   bdiag[n] = bi[n]-1;
1677   for (i=n-1; i>=0; i--) {
1678     nz = ai[i+1] - adiag[i] - 1;
1679     aj = a->j + adiag[i] + 1;
1680     for (j=0; j<nz; j++) {
1681       /*      *bj = aj[j]; bj++; */
1682       bj[k++] = aj[j];
1683     }
1684     /* diag[i] */
1685     /*    *bj = i; bj++; */
1686     bj[k++]  = i;
1687     bdiag[i] = bdiag[i+1] + nz + 1;
1688   }
1689 
1690   fact->factortype             = MAT_FACTOR_ILU;
1691   fact->info.factor_mallocs    = 0;
1692   fact->info.fill_ratio_given  = info->fill;
1693   fact->info.fill_ratio_needed = 1.0;
1694   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1695   ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr);
1696 
1697   b       = (Mat_SeqAIJ*)(fact)->data;
1698   b->row  = isrow;
1699   b->col  = iscol;
1700   b->icol = isicol;
1701   ierr    = PetscMalloc1(fact->rmap->n+1,&b->solve_work);CHKERRQ(ierr);
1702   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1703   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 #undef __FUNCT__
1708 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1709 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1710 {
1711   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1712   IS                 isicol;
1713   PetscErrorCode     ierr;
1714   const PetscInt     *r,*ic;
1715   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1716   PetscInt           *bi,*cols,nnz,*cols_lvl;
1717   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1718   PetscInt           i,levels,diagonal_fill;
1719   PetscBool          col_identity,row_identity,missing;
1720   PetscReal          f;
1721   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1722   PetscBT            lnkbt;
1723   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1724   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1725   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1726 
1727   PetscFunctionBegin;
1728   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1729   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1730   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1731 
1732   levels = (PetscInt)info->levels;
1733   ierr   = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1734   ierr   = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1735   if (!levels && row_identity && col_identity) {
1736     /* special case: ilu(0) with natural ordering */
1737     ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1738     if (a->inode.size) {
1739       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1740     }
1741     PetscFunctionReturn(0);
1742   }
1743 
1744   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1745   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1746   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1747 
1748   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1749   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
1750   ierr  = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
1751   bi[0] = bdiag[0] = 0;
1752   ierr  = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr);
1753 
1754   /* create a linked list for storing column indices of the active row */
1755   nlnk = n + 1;
1756   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1757 
1758   /* initial FreeSpace size is f*(ai[n]+1) */
1759   f                 = info->fill;
1760   diagonal_fill     = (PetscInt)info->diagonal_fill;
1761   ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr);
1762   current_space     = free_space;
1763   ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr);
1764   current_space_lvl = free_space_lvl;
1765   for (i=0; i<n; i++) {
1766     nzi = 0;
1767     /* copy current row into linked list */
1768     nnz = ai[r[i]+1] - ai[r[i]];
1769     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1770     cols   = aj + ai[r[i]];
1771     lnk[i] = -1; /* marker to indicate if diagonal exists */
1772     ierr   = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1773     nzi   += nlnk;
1774 
1775     /* make sure diagonal entry is included */
1776     if (diagonal_fill && lnk[i] == -1) {
1777       fm = n;
1778       while (lnk[fm] < i) fm = lnk[fm];
1779       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1780       lnk[fm]    = i;
1781       lnk_lvl[i] = 0;
1782       nzi++; dcount++;
1783     }
1784 
1785     /* add pivot rows into the active row */
1786     nzbd = 0;
1787     prow = lnk[n];
1788     while (prow < i) {
1789       nnz      = bdiag[prow];
1790       cols     = bj_ptr[prow] + nnz + 1;
1791       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1792       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1793       ierr     = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1794       nzi     += nlnk;
1795       prow     = lnk[prow];
1796       nzbd++;
1797     }
1798     bdiag[i] = nzbd;
1799     bi[i+1]  = bi[i] + nzi;
1800     /* if free space is not available, make more free space */
1801     if (current_space->local_remaining<nzi) {
1802       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */
1803       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1804       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1805       reallocs++;
1806     }
1807 
1808     /* copy data into free_space and free_space_lvl, then initialize lnk */
1809     ierr         = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1810     bj_ptr[i]    = current_space->array;
1811     bjlvl_ptr[i] = current_space_lvl->array;
1812 
1813     /* make sure the active row i has diagonal entry */
1814     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1815 
1816     current_space->array               += nzi;
1817     current_space->local_used          += nzi;
1818     current_space->local_remaining     -= nzi;
1819     current_space_lvl->array           += nzi;
1820     current_space_lvl->local_used      += nzi;
1821     current_space_lvl->local_remaining -= nzi;
1822   }
1823 
1824   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1825   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1826   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1827   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
1828   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
1829 
1830   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1831   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1832   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
1833 
1834 #if defined(PETSC_USE_INFO)
1835   {
1836     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1837     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
1838     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
1839     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr);
1840     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1841     if (diagonal_fill) {
1842       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr);
1843     }
1844   }
1845 #endif
1846   /* put together the new matrix */
1847   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
1848   ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr);
1849   b    = (Mat_SeqAIJ*)(fact)->data;
1850 
1851   b->free_a       = PETSC_TRUE;
1852   b->free_ij      = PETSC_TRUE;
1853   b->singlemalloc = PETSC_FALSE;
1854 
1855   ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr);
1856 
1857   b->j    = bj;
1858   b->i    = bi;
1859   b->diag = bdiag;
1860   b->ilen = 0;
1861   b->imax = 0;
1862   b->row  = isrow;
1863   b->col  = iscol;
1864   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1865   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1866   b->icol = isicol;
1867 
1868   ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
1869   /* In b structure:  Free imax, ilen, old a, old j.
1870      Allocate bdiag, solve_work, new a, new j */
1871   ierr     = PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1872   b->maxnz = b->nz = bdiag[0]+1;
1873 
1874   (fact)->info.factor_mallocs    = reallocs;
1875   (fact)->info.fill_ratio_given  = f;
1876   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1877   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1878   if (a->inode.size) {
1879     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1880   }
1881   ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr);
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace"
1887 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1888 {
1889   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1890   IS                 isicol;
1891   PetscErrorCode     ierr;
1892   const PetscInt     *r,*ic;
1893   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1894   PetscInt           *bi,*cols,nnz,*cols_lvl;
1895   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1896   PetscInt           i,levels,diagonal_fill;
1897   PetscBool          col_identity,row_identity;
1898   PetscReal          f;
1899   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1900   PetscBT            lnkbt;
1901   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1902   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1903   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1904   PetscBool          missing;
1905 
1906   PetscFunctionBegin;
1907   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1908   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1909   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1910 
1911   f             = info->fill;
1912   levels        = (PetscInt)info->levels;
1913   diagonal_fill = (PetscInt)info->diagonal_fill;
1914 
1915   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1916 
1917   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1918   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1919   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1920     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr);
1921 
1922     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1923     if (a->inode.size) {
1924       (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1925     }
1926     fact->factortype               = MAT_FACTOR_ILU;
1927     (fact)->info.factor_mallocs    = 0;
1928     (fact)->info.fill_ratio_given  = info->fill;
1929     (fact)->info.fill_ratio_needed = 1.0;
1930 
1931     b    = (Mat_SeqAIJ*)(fact)->data;
1932     b->row  = isrow;
1933     b->col  = iscol;
1934     b->icol = isicol;
1935     ierr    = PetscMalloc1((fact)->rmap->n+1,&b->solve_work);CHKERRQ(ierr);
1936     ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1937     ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1938     PetscFunctionReturn(0);
1939   }
1940 
1941   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1942   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1943 
1944   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1945   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
1946   ierr  = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
1947   bi[0] = bdiag[0] = 0;
1948 
1949   ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr);
1950 
1951   /* create a linked list for storing column indices of the active row */
1952   nlnk = n + 1;
1953   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1954 
1955   /* initial FreeSpace size is f*(ai[n]+1) */
1956   ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr);
1957   current_space     = free_space;
1958   ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr);
1959   current_space_lvl = free_space_lvl;
1960 
1961   for (i=0; i<n; i++) {
1962     nzi = 0;
1963     /* copy current row into linked list */
1964     nnz = ai[r[i]+1] - ai[r[i]];
1965     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1966     cols   = aj + ai[r[i]];
1967     lnk[i] = -1; /* marker to indicate if diagonal exists */
1968     ierr   = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1969     nzi   += nlnk;
1970 
1971     /* make sure diagonal entry is included */
1972     if (diagonal_fill && lnk[i] == -1) {
1973       fm = n;
1974       while (lnk[fm] < i) fm = lnk[fm];
1975       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1976       lnk[fm]    = i;
1977       lnk_lvl[i] = 0;
1978       nzi++; dcount++;
1979     }
1980 
1981     /* add pivot rows into the active row */
1982     nzbd = 0;
1983     prow = lnk[n];
1984     while (prow < i) {
1985       nnz      = bdiag[prow];
1986       cols     = bj_ptr[prow] + nnz + 1;
1987       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1988       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1989       ierr     = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1990       nzi     += nlnk;
1991       prow     = lnk[prow];
1992       nzbd++;
1993     }
1994     bdiag[i] = nzbd;
1995     bi[i+1]  = bi[i] + nzi;
1996 
1997     /* if free space is not available, make more free space */
1998     if (current_space->local_remaining<nzi) {
1999       nnz  = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */
2000       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
2001       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
2002       reallocs++;
2003     }
2004 
2005     /* copy data into free_space and free_space_lvl, then initialize lnk */
2006     ierr         = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2007     bj_ptr[i]    = current_space->array;
2008     bjlvl_ptr[i] = current_space_lvl->array;
2009 
2010     /* make sure the active row i has diagonal entry */
2011     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2012 
2013     current_space->array               += nzi;
2014     current_space->local_used          += nzi;
2015     current_space->local_remaining     -= nzi;
2016     current_space_lvl->array           += nzi;
2017     current_space_lvl->local_used      += nzi;
2018     current_space_lvl->local_remaining -= nzi;
2019   }
2020 
2021   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2022   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2023 
2024   /* destroy list of free space and other temporary arrays */
2025   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
2026   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */
2027   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2028   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2029   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
2030 
2031 #if defined(PETSC_USE_INFO)
2032   {
2033     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2034     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
2035     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2036     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr);
2037     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2038     if (diagonal_fill) {
2039       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr);
2040     }
2041   }
2042 #endif
2043 
2044   /* put together the new matrix */
2045   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
2046   ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr);
2047   b    = (Mat_SeqAIJ*)(fact)->data;
2048 
2049   b->free_a       = PETSC_TRUE;
2050   b->free_ij      = PETSC_TRUE;
2051   b->singlemalloc = PETSC_FALSE;
2052 
2053   ierr = PetscMalloc1(bi[n],&b->a);CHKERRQ(ierr);
2054   b->j = bj;
2055   b->i = bi;
2056   for (i=0; i<n; i++) bdiag[i] += bi[i];
2057   b->diag = bdiag;
2058   b->ilen = 0;
2059   b->imax = 0;
2060   b->row  = isrow;
2061   b->col  = iscol;
2062   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2063   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2064   b->icol = isicol;
2065   ierr    = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
2066   /* In b structure:  Free imax, ilen, old a, old j.
2067      Allocate bdiag, solve_work, new a, new j */
2068   ierr     = PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
2069   b->maxnz = b->nz = bi[n];
2070 
2071   (fact)->info.factor_mallocs    = reallocs;
2072   (fact)->info.fill_ratio_given  = f;
2073   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2074   (fact)->ops->lufactornumeric   =  MatLUFactorNumeric_SeqAIJ_inplace;
2075   if (a->inode.size) {
2076     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2077   }
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 #undef __FUNCT__
2082 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
2083 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2084 {
2085   Mat            C = B;
2086   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2087   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2088   IS             ip=b->row,iip = b->icol;
2089   PetscErrorCode ierr;
2090   const PetscInt *rip,*riip;
2091   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2092   PetscInt       *ai=a->i,*aj=a->j;
2093   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2094   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2095   PetscBool      perm_identity;
2096   FactorShiftCtx sctx;
2097   PetscReal      rs;
2098   MatScalar      d,*v;
2099 
2100   PetscFunctionBegin;
2101   /* MatPivotSetUp(): initialize shift context sctx */
2102   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2103 
2104   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2105     sctx.shift_top = info->zeropivot;
2106     for (i=0; i<mbs; i++) {
2107       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2108       d  = (aa)[a->diag[i]];
2109       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2110       v  = aa+ai[i];
2111       nz = ai[i+1] - ai[i];
2112       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2113       if (rs>sctx.shift_top) sctx.shift_top = rs;
2114     }
2115     sctx.shift_top *= 1.1;
2116     sctx.nshift_max = 5;
2117     sctx.shift_lo   = 0.;
2118     sctx.shift_hi   = 1.;
2119   }
2120 
2121   ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2122   ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2123 
2124   /* allocate working arrays
2125      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2126      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2127   */
2128   ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);CHKERRQ(ierr);
2129 
2130   do {
2131     sctx.newshift = PETSC_FALSE;
2132 
2133     for (i=0; i<mbs; i++) c2r[i] = mbs;
2134     if (mbs) il[0] = 0;
2135 
2136     for (k = 0; k<mbs; k++) {
2137       /* zero rtmp */
2138       nz    = bi[k+1] - bi[k];
2139       bjtmp = bj + bi[k];
2140       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2141 
2142       /* load in initial unfactored row */
2143       bval = ba + bi[k];
2144       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2145       for (j = jmin; j < jmax; j++) {
2146         col = riip[aj[j]];
2147         if (col >= k) { /* only take upper triangular entry */
2148           rtmp[col] = aa[j];
2149           *bval++   = 0.0; /* for in-place factorization */
2150         }
2151       }
2152       /* shift the diagonal of the matrix: ZeropivotApply() */
2153       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2154 
2155       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2156       dk = rtmp[k];
2157       i  = c2r[k]; /* first row to be added to k_th row  */
2158 
2159       while (i < k) {
2160         nexti = c2r[i]; /* next row to be added to k_th row */
2161 
2162         /* compute multiplier, update diag(k) and U(i,k) */
2163         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2164         uikdi   = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2165         dk     += uikdi*ba[ili]; /* update diag[k] */
2166         ba[ili] = uikdi; /* -U(i,k) */
2167 
2168         /* add multiple of row i to k-th row */
2169         jmin = ili + 1; jmax = bi[i+1];
2170         if (jmin < jmax) {
2171           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2172           /* update il and c2r for row i */
2173           il[i] = jmin;
2174           j     = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2175         }
2176         i = nexti;
2177       }
2178 
2179       /* copy data into U(k,:) */
2180       rs   = 0.0;
2181       jmin = bi[k]; jmax = bi[k+1]-1;
2182       if (jmin < jmax) {
2183         for (j=jmin; j<jmax; j++) {
2184           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2185         }
2186         /* add the k-th row into il and c2r */
2187         il[k] = jmin;
2188         i     = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2189       }
2190 
2191       /* MatPivotCheck() */
2192       sctx.rs = rs;
2193       sctx.pv = dk;
2194       ierr    = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr);
2195       if (sctx.newshift) break;
2196       dk = sctx.pv;
2197 
2198       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2199     }
2200   } while (sctx.newshift);
2201 
2202   ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr);
2203   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2204   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2205 
2206   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2207   if (perm_identity) {
2208     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2209     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2210     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2211     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2212   } else {
2213     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2214     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2215     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2216     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2217   }
2218 
2219   C->assembled    = PETSC_TRUE;
2220   C->preallocated = PETSC_TRUE;
2221 
2222   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2223 
2224   /* MatPivotView() */
2225   if (sctx.nshift) {
2226     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2227       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
2228     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2229       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
2230     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2231       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr);
2232     }
2233   }
2234   PetscFunctionReturn(0);
2235 }
2236 
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace"
2239 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2240 {
2241   Mat            C = B;
2242   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2243   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2244   IS             ip=b->row,iip = b->icol;
2245   PetscErrorCode ierr;
2246   const PetscInt *rip,*riip;
2247   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2248   PetscInt       *ai=a->i,*aj=a->j;
2249   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2250   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2251   PetscBool      perm_identity;
2252   FactorShiftCtx sctx;
2253   PetscReal      rs;
2254   MatScalar      d,*v;
2255 
2256   PetscFunctionBegin;
2257   /* MatPivotSetUp(): initialize shift context sctx */
2258   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2259 
2260   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2261     sctx.shift_top = info->zeropivot;
2262     for (i=0; i<mbs; i++) {
2263       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2264       d  = (aa)[a->diag[i]];
2265       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2266       v  = aa+ai[i];
2267       nz = ai[i+1] - ai[i];
2268       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2269       if (rs>sctx.shift_top) sctx.shift_top = rs;
2270     }
2271     sctx.shift_top *= 1.1;
2272     sctx.nshift_max = 5;
2273     sctx.shift_lo   = 0.;
2274     sctx.shift_hi   = 1.;
2275   }
2276 
2277   ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2278   ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2279 
2280   /* initialization */
2281   ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);CHKERRQ(ierr);
2282 
2283   do {
2284     sctx.newshift = PETSC_FALSE;
2285 
2286     for (i=0; i<mbs; i++) jl[i] = mbs;
2287     il[0] = 0;
2288 
2289     for (k = 0; k<mbs; k++) {
2290       /* zero rtmp */
2291       nz    = bi[k+1] - bi[k];
2292       bjtmp = bj + bi[k];
2293       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2294 
2295       bval = ba + bi[k];
2296       /* initialize k-th row by the perm[k]-th row of A */
2297       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2298       for (j = jmin; j < jmax; j++) {
2299         col = riip[aj[j]];
2300         if (col >= k) { /* only take upper triangular entry */
2301           rtmp[col] = aa[j];
2302           *bval++   = 0.0; /* for in-place factorization */
2303         }
2304       }
2305       /* shift the diagonal of the matrix */
2306       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2307 
2308       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2309       dk = rtmp[k];
2310       i  = jl[k]; /* first row to be added to k_th row  */
2311 
2312       while (i < k) {
2313         nexti = jl[i]; /* next row to be added to k_th row */
2314 
2315         /* compute multiplier, update diag(k) and U(i,k) */
2316         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2317         uikdi   = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2318         dk     += uikdi*ba[ili];
2319         ba[ili] = uikdi; /* -U(i,k) */
2320 
2321         /* add multiple of row i to k-th row */
2322         jmin = ili + 1; jmax = bi[i+1];
2323         if (jmin < jmax) {
2324           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2325           /* update il and jl for row i */
2326           il[i] = jmin;
2327           j     = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2328         }
2329         i = nexti;
2330       }
2331 
2332       /* shift the diagonals when zero pivot is detected */
2333       /* compute rs=sum of abs(off-diagonal) */
2334       rs   = 0.0;
2335       jmin = bi[k]+1;
2336       nz   = bi[k+1] - jmin;
2337       bcol = bj + jmin;
2338       for (j=0; j<nz; j++) {
2339         rs += PetscAbsScalar(rtmp[bcol[j]]);
2340       }
2341 
2342       sctx.rs = rs;
2343       sctx.pv = dk;
2344       ierr    = MatPivotCheck(B,A,info,&sctx,k);CHKERRQ(ierr);
2345       if (sctx.newshift) break;
2346       dk = sctx.pv;
2347 
2348       /* copy data into U(k,:) */
2349       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2350       jmin      = bi[k]+1; jmax = bi[k+1];
2351       if (jmin < jmax) {
2352         for (j=jmin; j<jmax; j++) {
2353           col = bj[j]; ba[j] = rtmp[col];
2354         }
2355         /* add the k-th row into il and jl */
2356         il[k] = jmin;
2357         i     = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2358       }
2359     }
2360   } while (sctx.newshift);
2361 
2362   ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr);
2363   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2364   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2365 
2366   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2367   if (perm_identity) {
2368     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2369     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2370     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2371     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2372   } else {
2373     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2374     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2375     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2376     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2377   }
2378 
2379   C->assembled    = PETSC_TRUE;
2380   C->preallocated = PETSC_TRUE;
2381 
2382   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2383   if (sctx.nshift) {
2384     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2385       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
2386     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2387       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
2388     }
2389   }
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 /*
2394    icc() under revised new data structure.
2395    Factored arrays bj and ba are stored as
2396      U(0,:),...,U(i,:),U(n-1,:)
2397 
2398    ui=fact->i is an array of size n+1, in which
2399    ui+
2400      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2401      ui[n]:  points to U(n-1,n-1)+1
2402 
2403   udiag=fact->diag is an array of size n,in which
2404      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2405 
2406    U(i,:) contains udiag[i] as its last entry, i.e.,
2407     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2408 */
2409 
2410 #undef __FUNCT__
2411 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
2412 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2413 {
2414   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2415   Mat_SeqSBAIJ       *b;
2416   PetscErrorCode     ierr;
2417   PetscBool          perm_identity,missing;
2418   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2419   const PetscInt     *rip,*riip;
2420   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2421   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2422   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2423   PetscReal          fill          =info->fill,levels=info->levels;
2424   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2425   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2426   PetscBT            lnkbt;
2427   IS                 iperm;
2428 
2429   PetscFunctionBegin;
2430   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2431   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2432   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2433   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2434   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2435 
2436   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
2437   ierr  = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr);
2438   ui[0] = 0;
2439 
2440   /* ICC(0) without matrix ordering: simply rearrange column indices */
2441   if (!levels && perm_identity) {
2442     for (i=0; i<am; i++) {
2443       ncols    = ai[i+1] - a->diag[i];
2444       ui[i+1]  = ui[i] + ncols;
2445       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2446     }
2447     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2448     cols = uj;
2449     for (i=0; i<am; i++) {
2450       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2451       ncols = ai[i+1] - a->diag[i] -1;
2452       for (j=0; j<ncols; j++) *cols++ = aj[j];
2453       *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2454     }
2455   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2456     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2457     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2458 
2459     /* initialization */
2460     ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr);
2461 
2462     /* jl: linked list for storing indices of the pivot rows
2463        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2464     ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr);
2465     for (i=0; i<am; i++) {
2466       jl[i] = am; il[i] = 0;
2467     }
2468 
2469     /* create and initialize a linked list for storing column indices of the active row k */
2470     nlnk = am + 1;
2471     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2472 
2473     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2474     ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2475     current_space     = free_space;
2476     ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr);
2477     current_space_lvl = free_space_lvl;
2478 
2479     for (k=0; k<am; k++) {  /* for each active row k */
2480       /* initialize lnk by the column indices of row rip[k] of A */
2481       nzk   = 0;
2482       ncols = ai[rip[k]+1] - ai[rip[k]];
2483       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2484       ncols_upper = 0;
2485       for (j=0; j<ncols; j++) {
2486         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2487         if (riip[i] >= k) { /* only take upper triangular entry */
2488           ajtmp[ncols_upper] = i;
2489           ncols_upper++;
2490         }
2491       }
2492       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2493       nzk += nlnk;
2494 
2495       /* update lnk by computing fill-in for each pivot row to be merged in */
2496       prow = jl[k]; /* 1st pivot row */
2497 
2498       while (prow < k) {
2499         nextprow = jl[prow];
2500 
2501         /* merge prow into k-th row */
2502         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2503         jmax  = ui[prow+1];
2504         ncols = jmax-jmin;
2505         i     = jmin - ui[prow];
2506         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2507         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2508         j     = *(uj - 1);
2509         ierr  = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2510         nzk  += nlnk;
2511 
2512         /* update il and jl for prow */
2513         if (jmin < jmax) {
2514           il[prow] = jmin;
2515           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2516         }
2517         prow = nextprow;
2518       }
2519 
2520       /* if free space is not available, make more free space */
2521       if (current_space->local_remaining<nzk) {
2522         i    = am - k + 1; /* num of unfactored rows */
2523         i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2524         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2525         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2526         reallocs++;
2527       }
2528 
2529       /* copy data into free_space and free_space_lvl, then initialize lnk */
2530       if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2531       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2532 
2533       /* add the k-th row into il and jl */
2534       if (nzk > 1) {
2535         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2536         jl[k] = jl[i]; jl[i] = k;
2537         il[k] = ui[k] + 1;
2538       }
2539       uj_ptr[k]     = current_space->array;
2540       uj_lvl_ptr[k] = current_space_lvl->array;
2541 
2542       current_space->array           += nzk;
2543       current_space->local_used      += nzk;
2544       current_space->local_remaining -= nzk;
2545 
2546       current_space_lvl->array           += nzk;
2547       current_space_lvl->local_used      += nzk;
2548       current_space_lvl->local_remaining -= nzk;
2549 
2550       ui[k+1] = ui[k] + nzk;
2551     }
2552 
2553     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2554     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2555     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2556     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2557 
2558     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2559     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2560     ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor  */
2561     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2562     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2563 
2564   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2565 
2566   /* put together the new matrix in MATSEQSBAIJ format */
2567   b               = (Mat_SeqSBAIJ*)(fact)->data;
2568   b->singlemalloc = PETSC_FALSE;
2569 
2570   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
2571 
2572   b->j             = uj;
2573   b->i             = ui;
2574   b->diag          = udiag;
2575   b->free_diag     = PETSC_TRUE;
2576   b->ilen          = 0;
2577   b->imax          = 0;
2578   b->row           = perm;
2579   b->col           = perm;
2580   ierr             = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2581   ierr             = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2582   b->icol          = iperm;
2583   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2584 
2585   ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
2586   ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2587 
2588   b->maxnz   = b->nz = ui[am];
2589   b->free_a  = PETSC_TRUE;
2590   b->free_ij = PETSC_TRUE;
2591 
2592   fact->info.factor_mallocs   = reallocs;
2593   fact->info.fill_ratio_given = fill;
2594   if (ai[am] != 0) {
2595     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2596     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2597   } else {
2598     fact->info.fill_ratio_needed = 0.0;
2599   }
2600 #if defined(PETSC_USE_INFO)
2601   if (ai[am] != 0) {
2602     PetscReal af = fact->info.fill_ratio_needed;
2603     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
2604     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2605     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
2606   } else {
2607     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2608   }
2609 #endif
2610   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2611   PetscFunctionReturn(0);
2612 }
2613 
2614 #undef __FUNCT__
2615 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace"
2616 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2617 {
2618   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2619   Mat_SeqSBAIJ       *b;
2620   PetscErrorCode     ierr;
2621   PetscBool          perm_identity,missing;
2622   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2623   const PetscInt     *rip,*riip;
2624   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2625   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2626   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2627   PetscReal          fill          =info->fill,levels=info->levels;
2628   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2629   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2630   PetscBT            lnkbt;
2631   IS                 iperm;
2632 
2633   PetscFunctionBegin;
2634   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2635   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2636   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2637   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2638   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2639 
2640   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
2641   ierr  = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr);
2642   ui[0] = 0;
2643 
2644   /* ICC(0) without matrix ordering: simply copies fill pattern */
2645   if (!levels && perm_identity) {
2646 
2647     for (i=0; i<am; i++) {
2648       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2649       udiag[i] = ui[i];
2650     }
2651     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2652     cols = uj;
2653     for (i=0; i<am; i++) {
2654       aj    = a->j + a->diag[i];
2655       ncols = ui[i+1] - ui[i];
2656       for (j=0; j<ncols; j++) *cols++ = *aj++;
2657     }
2658   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2659     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2660     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2661 
2662     /* initialization */
2663     ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr);
2664 
2665     /* jl: linked list for storing indices of the pivot rows
2666        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2667     ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr);
2668     for (i=0; i<am; i++) {
2669       jl[i] = am; il[i] = 0;
2670     }
2671 
2672     /* create and initialize a linked list for storing column indices of the active row k */
2673     nlnk = am + 1;
2674     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2675 
2676     /* initial FreeSpace size is fill*(ai[am]+1) */
2677     ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr);
2678     current_space     = free_space;
2679     ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);CHKERRQ(ierr);
2680     current_space_lvl = free_space_lvl;
2681 
2682     for (k=0; k<am; k++) {  /* for each active row k */
2683       /* initialize lnk by the column indices of row rip[k] of A */
2684       nzk   = 0;
2685       ncols = ai[rip[k]+1] - ai[rip[k]];
2686       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2687       ncols_upper = 0;
2688       for (j=0; j<ncols; j++) {
2689         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2690         if (riip[i] >= k) { /* only take upper triangular entry */
2691           ajtmp[ncols_upper] = i;
2692           ncols_upper++;
2693         }
2694       }
2695       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2696       nzk += nlnk;
2697 
2698       /* update lnk by computing fill-in for each pivot row to be merged in */
2699       prow = jl[k]; /* 1st pivot row */
2700 
2701       while (prow < k) {
2702         nextprow = jl[prow];
2703 
2704         /* merge prow into k-th row */
2705         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2706         jmax  = ui[prow+1];
2707         ncols = jmax-jmin;
2708         i     = jmin - ui[prow];
2709         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2710         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2711         j     = *(uj - 1);
2712         ierr  = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2713         nzk  += nlnk;
2714 
2715         /* update il and jl for prow */
2716         if (jmin < jmax) {
2717           il[prow] = jmin;
2718           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2719         }
2720         prow = nextprow;
2721       }
2722 
2723       /* if free space is not available, make more free space */
2724       if (current_space->local_remaining<nzk) {
2725         i    = am - k + 1; /* num of unfactored rows */
2726         i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2727         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2728         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2729         reallocs++;
2730       }
2731 
2732       /* copy data into free_space and free_space_lvl, then initialize lnk */
2733       if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2734       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2735 
2736       /* add the k-th row into il and jl */
2737       if (nzk > 1) {
2738         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2739         jl[k] = jl[i]; jl[i] = k;
2740         il[k] = ui[k] + 1;
2741       }
2742       uj_ptr[k]     = current_space->array;
2743       uj_lvl_ptr[k] = current_space_lvl->array;
2744 
2745       current_space->array           += nzk;
2746       current_space->local_used      += nzk;
2747       current_space->local_remaining -= nzk;
2748 
2749       current_space_lvl->array           += nzk;
2750       current_space_lvl->local_used      += nzk;
2751       current_space_lvl->local_remaining -= nzk;
2752 
2753       ui[k+1] = ui[k] + nzk;
2754     }
2755 
2756 #if defined(PETSC_USE_INFO)
2757     if (ai[am] != 0) {
2758       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2759       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
2760       ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2761       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
2762     } else {
2763       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2764     }
2765 #endif
2766 
2767     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2768     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2769     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2770     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2771 
2772     /* destroy list of free space and other temporary array(s) */
2773     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2774     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2775     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2776     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2777 
2778   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2779 
2780   /* put together the new matrix in MATSEQSBAIJ format */
2781 
2782   b               = (Mat_SeqSBAIJ*)fact->data;
2783   b->singlemalloc = PETSC_FALSE;
2784 
2785   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
2786 
2787   b->j         = uj;
2788   b->i         = ui;
2789   b->diag      = udiag;
2790   b->free_diag = PETSC_TRUE;
2791   b->ilen      = 0;
2792   b->imax      = 0;
2793   b->row       = perm;
2794   b->col       = perm;
2795 
2796   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2797   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2798 
2799   b->icol          = iperm;
2800   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2801   ierr             = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
2802   ierr             = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2803   b->maxnz         = b->nz = ui[am];
2804   b->free_a        = PETSC_TRUE;
2805   b->free_ij       = PETSC_TRUE;
2806 
2807   fact->info.factor_mallocs   = reallocs;
2808   fact->info.fill_ratio_given = fill;
2809   if (ai[am] != 0) {
2810     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2811   } else {
2812     fact->info.fill_ratio_needed = 0.0;
2813   }
2814   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2815   PetscFunctionReturn(0);
2816 }
2817 
2818 #undef __FUNCT__
2819 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
2820 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2821 {
2822   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2823   Mat_SeqSBAIJ       *b;
2824   PetscErrorCode     ierr;
2825   PetscBool          perm_identity,missing;
2826   PetscReal          fill = info->fill;
2827   const PetscInt     *rip,*riip;
2828   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2829   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2830   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2831   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2832   PetscBT            lnkbt;
2833   IS                 iperm;
2834 
2835   PetscFunctionBegin;
2836   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2837   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
2838   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2839 
2840   /* check whether perm is the identity mapping */
2841   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2842   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2843   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2844   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2845 
2846   /* initialization */
2847   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
2848   ierr  = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr);
2849   ui[0] = 0;
2850 
2851   /* jl: linked list for storing indices of the pivot rows
2852      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2853   ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr);
2854   for (i=0; i<am; i++) {
2855     jl[i] = am; il[i] = 0;
2856   }
2857 
2858   /* create and initialize a linked list for storing column indices of the active row k */
2859   nlnk = am + 1;
2860   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2861 
2862   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2863   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2864   current_space = free_space;
2865 
2866   for (k=0; k<am; k++) {  /* for each active row k */
2867     /* initialize lnk by the column indices of row rip[k] of A */
2868     nzk   = 0;
2869     ncols = ai[rip[k]+1] - ai[rip[k]];
2870     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2871     ncols_upper = 0;
2872     for (j=0; j<ncols; j++) {
2873       i = riip[*(aj + ai[rip[k]] + j)];
2874       if (i >= k) { /* only take upper triangular entry */
2875         cols[ncols_upper] = i;
2876         ncols_upper++;
2877       }
2878     }
2879     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2880     nzk += nlnk;
2881 
2882     /* update lnk by computing fill-in for each pivot row to be merged in */
2883     prow = jl[k]; /* 1st pivot row */
2884 
2885     while (prow < k) {
2886       nextprow = jl[prow];
2887       /* merge prow into k-th row */
2888       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2889       jmax   = ui[prow+1];
2890       ncols  = jmax-jmin;
2891       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2892       ierr   = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2893       nzk   += nlnk;
2894 
2895       /* update il and jl for prow */
2896       if (jmin < jmax) {
2897         il[prow] = jmin;
2898         j        = *uj_ptr;
2899         jl[prow] = jl[j];
2900         jl[j]    = prow;
2901       }
2902       prow = nextprow;
2903     }
2904 
2905     /* if free space is not available, make more free space */
2906     if (current_space->local_remaining<nzk) {
2907       i    = am - k + 1; /* num of unfactored rows */
2908       i    = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2909       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2910       reallocs++;
2911     }
2912 
2913     /* copy data into free space, then initialize lnk */
2914     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
2915 
2916     /* add the k-th row into il and jl */
2917     if (nzk > 1) {
2918       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2919       jl[k] = jl[i]; jl[i] = k;
2920       il[k] = ui[k] + 1;
2921     }
2922     ui_ptr[k] = current_space->array;
2923 
2924     current_space->array           += nzk;
2925     current_space->local_used      += nzk;
2926     current_space->local_remaining -= nzk;
2927 
2928     ui[k+1] = ui[k] + nzk;
2929   }
2930 
2931   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2932   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2933   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
2934 
2935   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2936   ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2937   ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */
2938   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2939 
2940   /* put together the new matrix in MATSEQSBAIJ format */
2941 
2942   b               = (Mat_SeqSBAIJ*)fact->data;
2943   b->singlemalloc = PETSC_FALSE;
2944   b->free_a       = PETSC_TRUE;
2945   b->free_ij      = PETSC_TRUE;
2946 
2947   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
2948 
2949   b->j         = uj;
2950   b->i         = ui;
2951   b->diag      = udiag;
2952   b->free_diag = PETSC_TRUE;
2953   b->ilen      = 0;
2954   b->imax      = 0;
2955   b->row       = perm;
2956   b->col       = perm;
2957 
2958   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2959   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2960 
2961   b->icol          = iperm;
2962   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2963 
2964   ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
2965   ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2966 
2967   b->maxnz = b->nz = ui[am];
2968 
2969   fact->info.factor_mallocs   = reallocs;
2970   fact->info.fill_ratio_given = fill;
2971   if (ai[am] != 0) {
2972     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2973     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2974   } else {
2975     fact->info.fill_ratio_needed = 0.0;
2976   }
2977 #if defined(PETSC_USE_INFO)
2978   if (ai[am] != 0) {
2979     PetscReal af = fact->info.fill_ratio_needed;
2980     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
2981     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2982     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
2983   } else {
2984     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2985   }
2986 #endif
2987   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2988   PetscFunctionReturn(0);
2989 }
2990 
2991 #undef __FUNCT__
2992 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace"
2993 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2994 {
2995   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2996   Mat_SeqSBAIJ       *b;
2997   PetscErrorCode     ierr;
2998   PetscBool          perm_identity,missing;
2999   PetscReal          fill = info->fill;
3000   const PetscInt     *rip,*riip;
3001   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
3002   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
3003   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
3004   PetscFreeSpaceList free_space=NULL,current_space=NULL;
3005   PetscBT            lnkbt;
3006   IS                 iperm;
3007 
3008   PetscFunctionBegin;
3009   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
3010   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3011   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3012 
3013   /* check whether perm is the identity mapping */
3014   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
3015   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
3016   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
3017   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
3018 
3019   /* initialization */
3020   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
3021   ui[0] = 0;
3022 
3023   /* jl: linked list for storing indices of the pivot rows
3024      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3025   ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr);
3026   for (i=0; i<am; i++) {
3027     jl[i] = am; il[i] = 0;
3028   }
3029 
3030   /* create and initialize a linked list for storing column indices of the active row k */
3031   nlnk = am + 1;
3032   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3033 
3034   /* initial FreeSpace size is fill*(ai[am]+1) */
3035   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr);
3036   current_space = free_space;
3037 
3038   for (k=0; k<am; k++) {  /* for each active row k */
3039     /* initialize lnk by the column indices of row rip[k] of A */
3040     nzk   = 0;
3041     ncols = ai[rip[k]+1] - ai[rip[k]];
3042     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3043     ncols_upper = 0;
3044     for (j=0; j<ncols; j++) {
3045       i = riip[*(aj + ai[rip[k]] + j)];
3046       if (i >= k) { /* only take upper triangular entry */
3047         cols[ncols_upper] = i;
3048         ncols_upper++;
3049       }
3050     }
3051     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3052     nzk += nlnk;
3053 
3054     /* update lnk by computing fill-in for each pivot row to be merged in */
3055     prow = jl[k]; /* 1st pivot row */
3056 
3057     while (prow < k) {
3058       nextprow = jl[prow];
3059       /* merge prow into k-th row */
3060       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3061       jmax   = ui[prow+1];
3062       ncols  = jmax-jmin;
3063       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3064       ierr   = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3065       nzk   += nlnk;
3066 
3067       /* update il and jl for prow */
3068       if (jmin < jmax) {
3069         il[prow] = jmin;
3070         j        = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3071       }
3072       prow = nextprow;
3073     }
3074 
3075     /* if free space is not available, make more free space */
3076     if (current_space->local_remaining<nzk) {
3077       i    = am - k + 1; /* num of unfactored rows */
3078       i    = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3079       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
3080       reallocs++;
3081     }
3082 
3083     /* copy data into free space, then initialize lnk */
3084     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3085 
3086     /* add the k-th row into il and jl */
3087     if (nzk-1 > 0) {
3088       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3089       jl[k] = jl[i]; jl[i] = k;
3090       il[k] = ui[k] + 1;
3091     }
3092     ui_ptr[k] = current_space->array;
3093 
3094     current_space->array           += nzk;
3095     current_space->local_used      += nzk;
3096     current_space->local_remaining -= nzk;
3097 
3098     ui[k+1] = ui[k] + nzk;
3099   }
3100 
3101 #if defined(PETSC_USE_INFO)
3102   if (ai[am] != 0) {
3103     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3104     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
3105     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
3106     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
3107   } else {
3108     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
3109   }
3110 #endif
3111 
3112   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
3113   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
3114   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
3115 
3116   /* destroy list of free space and other temporary array(s) */
3117   ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
3118   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
3119   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3120 
3121   /* put together the new matrix in MATSEQSBAIJ format */
3122 
3123   b               = (Mat_SeqSBAIJ*)fact->data;
3124   b->singlemalloc = PETSC_FALSE;
3125   b->free_a       = PETSC_TRUE;
3126   b->free_ij      = PETSC_TRUE;
3127 
3128   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
3129 
3130   b->j    = uj;
3131   b->i    = ui;
3132   b->diag = 0;
3133   b->ilen = 0;
3134   b->imax = 0;
3135   b->row  = perm;
3136   b->col  = perm;
3137 
3138   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3139   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3140 
3141   b->icol          = iperm;
3142   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3143 
3144   ierr     = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
3145   ierr     = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3146   b->maxnz = b->nz = ui[am];
3147 
3148   fact->info.factor_mallocs   = reallocs;
3149   fact->info.fill_ratio_given = fill;
3150   if (ai[am] != 0) {
3151     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3152   } else {
3153     fact->info.fill_ratio_needed = 0.0;
3154   }
3155   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3156   PetscFunctionReturn(0);
3157 }
3158 
3159 #undef __FUNCT__
3160 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
3161 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3162 {
3163   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3164   PetscErrorCode    ierr;
3165   PetscInt          n   = A->rmap->n;
3166   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3167   PetscScalar       *x,sum;
3168   const PetscScalar *b;
3169   const MatScalar   *aa = a->a,*v;
3170   PetscInt          i,nz;
3171 
3172   PetscFunctionBegin;
3173   if (!n) PetscFunctionReturn(0);
3174 
3175   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3176   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3177 
3178   /* forward solve the lower triangular */
3179   x[0] = b[0];
3180   v    = aa;
3181   vi   = aj;
3182   for (i=1; i<n; i++) {
3183     nz  = ai[i+1] - ai[i];
3184     sum = b[i];
3185     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3186     v   += nz;
3187     vi  += nz;
3188     x[i] = sum;
3189   }
3190 
3191   /* backward solve the upper triangular */
3192   for (i=n-1; i>=0; i--) {
3193     v   = aa + adiag[i+1] + 1;
3194     vi  = aj + adiag[i+1] + 1;
3195     nz  = adiag[i] - adiag[i+1]-1;
3196     sum = x[i];
3197     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3198     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3199   }
3200 
3201   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3202   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3203   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3204   PetscFunctionReturn(0);
3205 }
3206 
3207 #undef __FUNCT__
3208 #define __FUNCT__ "MatSolve_SeqAIJ"
3209 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3210 {
3211   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
3212   IS                iscol = a->col,isrow = a->row;
3213   PetscErrorCode    ierr;
3214   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3215   const PetscInt    *rout,*cout,*r,*c;
3216   PetscScalar       *x,*tmp,sum;
3217   const PetscScalar *b;
3218   const MatScalar   *aa = a->a,*v;
3219 
3220   PetscFunctionBegin;
3221   if (!n) PetscFunctionReturn(0);
3222 
3223   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3224   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3225   tmp  = a->solve_work;
3226 
3227   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
3228   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
3229 
3230   /* forward solve the lower triangular */
3231   tmp[0] = b[r[0]];
3232   v      = aa;
3233   vi     = aj;
3234   for (i=1; i<n; i++) {
3235     nz  = ai[i+1] - ai[i];
3236     sum = b[r[i]];
3237     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3238     tmp[i] = sum;
3239     v     += nz; vi += nz;
3240   }
3241 
3242   /* backward solve the upper triangular */
3243   for (i=n-1; i>=0; i--) {
3244     v   = aa + adiag[i+1]+1;
3245     vi  = aj + adiag[i+1]+1;
3246     nz  = adiag[i]-adiag[i+1]-1;
3247     sum = tmp[i];
3248     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3249     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3250   }
3251 
3252   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
3253   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
3254   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3255   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3256   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
3257   PetscFunctionReturn(0);
3258 }
3259 
3260 #undef __FUNCT__
3261 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
3262 /*
3263     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3264 */
3265 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3266 {
3267   Mat            B = *fact;
3268   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b;
3269   IS             isicol;
3270   PetscErrorCode ierr;
3271   const PetscInt *r,*ic;
3272   PetscInt       i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3273   PetscInt       *bi,*bj,*bdiag,*bdiag_rev;
3274   PetscInt       row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3275   PetscInt       nlnk,*lnk;
3276   PetscBT        lnkbt;
3277   PetscBool      row_identity,icol_identity;
3278   MatScalar      *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3279   const PetscInt *ics;
3280   PetscInt       j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3281   PetscReal      dt     =info->dt,shift=info->shiftamount;
3282   PetscInt       dtcount=(PetscInt)info->dtcount,nnz_max;
3283   PetscBool      missing;
3284 
3285   PetscFunctionBegin;
3286   if (dt      == PETSC_DEFAULT) dt = 0.005;
3287   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3288 
3289   /* ------- symbolic factorization, can be reused ---------*/
3290   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3291   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3292   adiag=a->diag;
3293 
3294   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
3295 
3296   /* bdiag is location of diagonal in factor */
3297   ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);     /* becomes b->diag */
3298   ierr = PetscMalloc1(n+1,&bdiag_rev);CHKERRQ(ierr); /* temporary */
3299 
3300   /* allocate row pointers bi */
3301   ierr = PetscMalloc1(2*n+2,&bi);CHKERRQ(ierr);
3302 
3303   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3304   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3305   nnz_max = ai[n]+2*n*dtcount+2;
3306 
3307   ierr = PetscMalloc1(nnz_max+1,&bj);CHKERRQ(ierr);
3308   ierr = PetscMalloc1(nnz_max+1,&ba);CHKERRQ(ierr);
3309 
3310   /* put together the new matrix */
3311   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
3312   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr);
3313   b    = (Mat_SeqAIJ*)B->data;
3314 
3315   b->free_a       = PETSC_TRUE;
3316   b->free_ij      = PETSC_TRUE;
3317   b->singlemalloc = PETSC_FALSE;
3318 
3319   b->a    = ba;
3320   b->j    = bj;
3321   b->i    = bi;
3322   b->diag = bdiag;
3323   b->ilen = 0;
3324   b->imax = 0;
3325   b->row  = isrow;
3326   b->col  = iscol;
3327   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
3328   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
3329   b->icol = isicol;
3330 
3331   ierr     = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
3332   ierr     = PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3333   b->maxnz = nnz_max;
3334 
3335   B->factortype            = MAT_FACTOR_ILUDT;
3336   B->info.factor_mallocs   = 0;
3337   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3338   /* ------- end of symbolic factorization ---------*/
3339 
3340   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3341   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3342   ics  = ic;
3343 
3344   /* linked list for storing column indices of the active row */
3345   nlnk = n + 1;
3346   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3347 
3348   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3349   ierr = PetscMalloc2(n,&im,n,&jtmp);CHKERRQ(ierr);
3350   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3351   ierr = PetscMalloc2(n,&rtmp,n,&vtmp);CHKERRQ(ierr);
3352   ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr);
3353 
3354   bi[0]        = 0;
3355   bdiag[0]     = nnz_max-1; /* location of diag[0] in factor B */
3356   bdiag_rev[n] = bdiag[0];
3357   bi[2*n+1]    = bdiag[0]+1; /* endof bj and ba array */
3358   for (i=0; i<n; i++) {
3359     /* copy initial fill into linked list */
3360     nzi = ai[r[i]+1] - ai[r[i]];
3361     if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3362     nzi_al = adiag[r[i]] - ai[r[i]];
3363     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3364     ajtmp  = aj + ai[r[i]];
3365     ierr   = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3366 
3367     /* load in initial (unfactored row) */
3368     aatmp = a->a + ai[r[i]];
3369     for (j=0; j<nzi; j++) {
3370       rtmp[ics[*ajtmp++]] = *aatmp++;
3371     }
3372 
3373     /* add pivot rows into linked list */
3374     row = lnk[n];
3375     while (row < i) {
3376       nzi_bl = bi[row+1] - bi[row] + 1;
3377       bjtmp  = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3378       ierr   = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
3379       nzi   += nlnk;
3380       row    = lnk[row];
3381     }
3382 
3383     /* copy data from lnk into jtmp, then initialize lnk */
3384     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
3385 
3386     /* numerical factorization */
3387     bjtmp = jtmp;
3388     row   = *bjtmp++; /* 1st pivot row */
3389     while (row < i) {
3390       pc         = rtmp + row;
3391       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3392       multiplier = (*pc) * (*pv);
3393       *pc        = multiplier;
3394       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3395         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3396         pv = ba + bdiag[row+1] + 1;
3397         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3398         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3399         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3400         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3401       }
3402       row = *bjtmp++;
3403     }
3404 
3405     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3406     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3407     nzi_bl   = 0; j = 0;
3408     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3409       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3410       nzi_bl++; j++;
3411     }
3412     nzi_bu = nzi - nzi_bl -1;
3413     while (j < nzi) {
3414       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3415       j++;
3416     }
3417 
3418     bjtmp = bj + bi[i];
3419     batmp = ba + bi[i];
3420     /* apply level dropping rule to L part */
3421     ncut = nzi_al + dtcount;
3422     if (ncut < nzi_bl) {
3423       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
3424       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
3425     } else {
3426       ncut = nzi_bl;
3427     }
3428     for (j=0; j<ncut; j++) {
3429       bjtmp[j] = jtmp[j];
3430       batmp[j] = vtmp[j];
3431       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3432     }
3433     bi[i+1] = bi[i] + ncut;
3434     nzi     = ncut + 1;
3435 
3436     /* apply level dropping rule to U part */
3437     ncut = nzi_au + dtcount;
3438     if (ncut < nzi_bu) {
3439       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
3440       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
3441     } else {
3442       ncut = nzi_bu;
3443     }
3444     nzi += ncut;
3445 
3446     /* mark bdiagonal */
3447     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3448     bdiag_rev[n-i-1] = bdiag[i+1];
3449     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3450     bjtmp            = bj + bdiag[i];
3451     batmp            = ba + bdiag[i];
3452     *bjtmp           = i;
3453     *batmp           = diag_tmp; /* rtmp[i]; */
3454     if (*batmp == 0.0) {
3455       *batmp = dt+shift;
3456       /* printf(" row %d add shift %g\n",i,shift); */
3457     }
3458     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3459     /* printf(" (%d,%g),",*bjtmp,*batmp); */
3460 
3461     bjtmp = bj + bdiag[i+1]+1;
3462     batmp = ba + bdiag[i+1]+1;
3463     for (k=0; k<ncut; k++) {
3464       bjtmp[k] = jtmp[nzi_bl+1+k];
3465       batmp[k] = vtmp[nzi_bl+1+k];
3466       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3467     }
3468     /* printf("\n"); */
3469 
3470     im[i] = nzi;   /* used by PetscLLAddSortedLU() */
3471     /*
3472     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3473     printf(" ----------------------------\n");
3474     */
3475   } /* for (i=0; i<n; i++) */
3476     /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3477   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3478 
3479   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3480   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3481 
3482   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3483   ierr = PetscFree2(im,jtmp);CHKERRQ(ierr);
3484   ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr);
3485   ierr = PetscFree(bdiag_rev);CHKERRQ(ierr);
3486 
3487   ierr     = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
3488   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3489 
3490   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3491   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
3492   if (row_identity && icol_identity) {
3493     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3494   } else {
3495     B->ops->solve = MatSolve_SeqAIJ;
3496   }
3497 
3498   B->ops->solveadd          = 0;
3499   B->ops->solvetranspose    = 0;
3500   B->ops->solvetransposeadd = 0;
3501   B->ops->matsolve          = 0;
3502   B->assembled              = PETSC_TRUE;
3503   B->preallocated           = PETSC_TRUE;
3504   PetscFunctionReturn(0);
3505 }
3506 
3507 /* a wraper of MatILUDTFactor_SeqAIJ() */
3508 #undef __FUNCT__
3509 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
3510 /*
3511     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3512 */
3513 
3514 PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3515 {
3516   PetscErrorCode ierr;
3517 
3518   PetscFunctionBegin;
3519   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
3520   PetscFunctionReturn(0);
3521 }
3522 
3523 /*
3524    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3525    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3526 */
3527 #undef __FUNCT__
3528 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
3529 /*
3530     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3531 */
3532 
3533 PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3534 {
3535   Mat            C     =fact;
3536   Mat_SeqAIJ     *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3537   IS             isrow = b->row,isicol = b->icol;
3538   PetscErrorCode ierr;
3539   const PetscInt *r,*ic,*ics;
3540   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3541   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3542   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3543   PetscReal      dt=info->dt,shift=info->shiftamount;
3544   PetscBool      row_identity, col_identity;
3545 
3546   PetscFunctionBegin;
3547   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3548   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3549   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
3550   ics  = ic;
3551 
3552   for (i=0; i<n; i++) {
3553     /* initialize rtmp array */
3554     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3555     bjtmp = bj + bi[i];
3556     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3557     rtmp[i] = 0.0;
3558     nzu     = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3559     bjtmp   = bj + bdiag[i+1] + 1;
3560     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3561 
3562     /* load in initial unfactored row of A */
3563     /* printf("row %d\n",i); */
3564     nz    = ai[r[i]+1] - ai[r[i]];
3565     ajtmp = aj + ai[r[i]];
3566     v     = aa + ai[r[i]];
3567     for (j=0; j<nz; j++) {
3568       rtmp[ics[*ajtmp++]] = v[j];
3569       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3570     }
3571     /* printf("\n"); */
3572 
3573     /* numerical factorization */
3574     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3575     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3576     k     = 0;
3577     while (k < nzl) {
3578       row = *bjtmp++;
3579       /* printf("  prow %d\n",row); */
3580       pc         = rtmp + row;
3581       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3582       multiplier = (*pc) * (*pv);
3583       *pc        = multiplier;
3584       if (PetscAbsScalar(multiplier) > dt) {
3585         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3586         pv = b->a + bdiag[row+1] + 1;
3587         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3588         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3589         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3590       }
3591       k++;
3592     }
3593 
3594     /* finished row so stick it into b->a */
3595     /* L-part */
3596     pv  = b->a + bi[i];
3597     pj  = bj + bi[i];
3598     nzl = bi[i+1] - bi[i];
3599     for (j=0; j<nzl; j++) {
3600       pv[j] = rtmp[pj[j]];
3601       /* printf(" (%d,%g),",pj[j],pv[j]); */
3602     }
3603 
3604     /* diagonal: invert diagonal entries for simplier triangular solves */
3605     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3606     b->a[bdiag[i]] = 1.0/rtmp[i];
3607     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3608 
3609     /* U-part */
3610     pv  = b->a + bdiag[i+1] + 1;
3611     pj  = bj + bdiag[i+1] + 1;
3612     nzu = bdiag[i] - bdiag[i+1] - 1;
3613     for (j=0; j<nzu; j++) {
3614       pv[j] = rtmp[pj[j]];
3615       /* printf(" (%d,%g),",pj[j],pv[j]); */
3616     }
3617     /* printf("\n"); */
3618   }
3619 
3620   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3621   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3622   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3623 
3624   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3625   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
3626   if (row_identity && col_identity) {
3627     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3628   } else {
3629     C->ops->solve = MatSolve_SeqAIJ;
3630   }
3631   C->ops->solveadd          = 0;
3632   C->ops->solvetranspose    = 0;
3633   C->ops->solvetransposeadd = 0;
3634   C->ops->matsolve          = 0;
3635   C->assembled              = PETSC_TRUE;
3636   C->preallocated           = PETSC_TRUE;
3637 
3638   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
3639   PetscFunctionReturn(0);
3640 }
3641