xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 4fc747eaadbeca11629f314a99edccbc2ed7b3d3) !
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ"
9 /*
10       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
11 
12       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
13 */
14 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
15 {
16   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
17   PetscErrorCode    ierr;
18   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
19   const PetscInt    *ai = a->i, *aj = a->j;
20   const PetscScalar *aa = a->a;
21   PetscBool         *done;
22   PetscReal         best,past = 0,future;
23 
24   PetscFunctionBegin;
25   /* pick initial row */
26   best = -1;
27   for (i=0; i<n; i++) {
28     future = 0.0;
29     for (j=ai[i]; j<ai[i+1]; j++) {
30       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
31       else              past  = PetscAbsScalar(aa[j]);
32     }
33     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34     if (past/future > best) {
35       best    = past/future;
36       current = i;
37     }
38   }
39 
40   ierr     = PetscMalloc1(n,&done);CHKERRQ(ierr);
41   ierr     = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr);
42   ierr     = PetscMalloc1(n,&order);CHKERRQ(ierr);
43   order[0] = current;
44   for (i=0; i<n-1; i++) {
45     done[current] = PETSC_TRUE;
46     best          = -1;
47     /* loop over all neighbors of current pivot */
48     for (j=ai[current]; j<ai[current+1]; j++) {
49       jj = aj[j];
50       if (done[jj]) continue;
51       /* loop over columns of potential next row computing weights for below and above diagonal */
52       past = future = 0.0;
53       for (k=ai[jj]; k<ai[jj+1]; k++) {
54         kk = aj[k];
55         if (done[kk]) past += PetscAbsScalar(aa[k]);
56         else if (kk != jj) future += PetscAbsScalar(aa[k]);
57       }
58       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59       if (past/future > best) {
60         best       = past/future;
61         newcurrent = jj;
62       }
63     }
64     if (best == -1) { /* no neighbors to select from so select best of all that remain */
65       best = -1;
66       for (k=0; k<n; k++) {
67         if (done[k]) continue;
68         future = 0.0;
69         past   = 0.0;
70         for (j=ai[k]; j<ai[k+1]; j++) {
71           kk = aj[j];
72           if (done[kk])       past += PetscAbsScalar(aa[j]);
73           else if (kk != k) future += PetscAbsScalar(aa[j]);
74         }
75         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76         if (past/future > best) {
77           best       = past/future;
78           newcurrent = k;
79         }
80       }
81     }
82     if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
83     current    = newcurrent;
84     order[i+1] = current;
85   }
86   ierr  = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr);
87   *icol = *irow;
88   ierr  = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr);
89   ierr  = PetscFree(done);CHKERRQ(ierr);
90   ierr  = PetscFree(order);CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 
94 #undef __FUNCT__
95 #define __FUNCT__ "MatGetFactor_seqaij_petsc"
96 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
97 {
98   PetscInt       n = A->rmap->n;
99   PetscErrorCode ierr;
100 
101   PetscFunctionBegin;
102 #if defined(PETSC_USE_COMPLEX)
103   if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported");
104 #endif
105   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
106   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
107   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
108     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
109 
110     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
111     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
112 
113     ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr);
114   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
115     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
116     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
117 
118     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
119     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
120   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
121   (*B)->factortype = ftype;
122 
123   ierr = PetscFree((*B)->solvertype);CHKERRQ(ierr);
124   ierr = PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);CHKERRQ(ierr);
125   PetscFunctionReturn(0);
126 }
127 
128 #undef __FUNCT__
129 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace"
130 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
131 {
132   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
133   IS                 isicol;
134   PetscErrorCode     ierr;
135   const PetscInt     *r,*ic;
136   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
137   PetscInt           *bi,*bj,*ajtmp;
138   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
139   PetscReal          f;
140   PetscInt           nlnk,*lnk,k,**bi_ptr;
141   PetscFreeSpaceList free_space=NULL,current_space=NULL;
142   PetscBT            lnkbt;
143   PetscBool          missing;
144 
145   PetscFunctionBegin;
146   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
147   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
148   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
149 
150   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
151   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
152   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
153 
154   /* get new row pointers */
155   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
156   bi[0] = 0;
157 
158   /* bdiag is location of diagonal in factor */
159   ierr     = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
160   bdiag[0] = 0;
161 
162   /* linked list for storing column indices of the active row */
163   nlnk = n + 1;
164   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
165 
166   ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr);
167 
168   /* initial FreeSpace size is f*(ai[n]+1) */
169   f             = info->fill;
170   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr);
171   current_space = free_space;
172 
173   for (i=0; i<n; i++) {
174     /* copy previous fill into linked list */
175     nzi = 0;
176     nnz = ai[r[i]+1] - ai[r[i]];
177     ajtmp = aj + ai[r[i]];
178     ierr  = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
179     nzi  += nlnk;
180 
181     /* add pivot rows into linked list */
182     row = lnk[n];
183     while (row < i) {
184       nzbd  = bdiag[row] - bi[row] + 1;   /* num of entries in the row with column index <= row */
185       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
186       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
187       nzi  += nlnk;
188       row   = lnk[row];
189     }
190     bi[i+1] = bi[i] + nzi;
191     im[i]   = nzi;
192 
193     /* mark bdiag */
194     nzbd = 0;
195     nnz  = nzi;
196     k    = lnk[n];
197     while (nnz-- && k < i) {
198       nzbd++;
199       k = lnk[k];
200     }
201     bdiag[i] = bi[i] + nzbd;
202 
203     /* if free space is not available, make more free space */
204     if (current_space->local_remaining<nzi) {
205       nnz  = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
206       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
207       reallocs++;
208     }
209 
210     /* copy data into free space, then initialize lnk */
211     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
212 
213     bi_ptr[i]                       = current_space->array;
214     current_space->array           += nzi;
215     current_space->local_used      += nzi;
216     current_space->local_remaining -= nzi;
217   }
218 #if defined(PETSC_USE_INFO)
219   if (ai[n] != 0) {
220     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
221     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
222     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
223     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr);
224     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
225   } else {
226     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
227   }
228 #endif
229 
230   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
231   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
232 
233   /* destroy list of free space and other temporary array(s) */
234   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
235   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
236   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
237   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
238 
239   /* put together the new matrix */
240   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
241   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr);
242   b    = (Mat_SeqAIJ*)(B)->data;
243 
244   b->free_a       = PETSC_TRUE;
245   b->free_ij      = PETSC_TRUE;
246   b->singlemalloc = PETSC_FALSE;
247 
248   ierr    = PetscMalloc1(bi[n]+1,&b->a);CHKERRQ(ierr);
249   b->j    = bj;
250   b->i    = bi;
251   b->diag = bdiag;
252   b->ilen = 0;
253   b->imax = 0;
254   b->row  = isrow;
255   b->col  = iscol;
256   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
257   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
258   b->icol = isicol;
259   ierr    = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
260 
261   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
262   ierr     = PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
263   b->maxnz = b->nz = bi[n];
264 
265   (B)->factortype            = MAT_FACTOR_LU;
266   (B)->info.factor_mallocs   = reallocs;
267   (B)->info.fill_ratio_given = f;
268 
269   if (ai[n]) {
270     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
271   } else {
272     (B)->info.fill_ratio_needed = 0.0;
273   }
274   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
275   if (a->inode.size) {
276     (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
277   }
278   PetscFunctionReturn(0);
279 }
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ"
283 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
284 {
285   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
286   IS                 isicol;
287   PetscErrorCode     ierr;
288   const PetscInt     *r,*ic,*ai=a->i,*aj=a->j,*ajtmp;
289   PetscInt           i,n=A->rmap->n;
290   PetscInt           *bi,*bj;
291   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
292   PetscReal          f;
293   PetscInt           nlnk,*lnk,k,**bi_ptr;
294   PetscFreeSpaceList free_space=NULL,current_space=NULL;
295   PetscBT            lnkbt;
296   PetscBool          missing;
297 
298   PetscFunctionBegin;
299   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
300   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
301   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
302 
303   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
304   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
305   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
306 
307   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
308   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
309   ierr  = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
310   bi[0] = bdiag[0] = 0;
311 
312   /* linked list for storing column indices of the active row */
313   nlnk = n + 1;
314   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
315 
316   ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr);
317 
318   /* initial FreeSpace size is f*(ai[n]+1) */
319   f             = info->fill;
320   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr);
321   current_space = free_space;
322 
323   for (i=0; i<n; i++) {
324     /* copy previous fill into linked list */
325     nzi = 0;
326     nnz = ai[r[i]+1] - ai[r[i]];
327     ajtmp = aj + ai[r[i]];
328     ierr  = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
329     nzi  += nlnk;
330 
331     /* add pivot rows into linked list */
332     row = lnk[n];
333     while (row < i) {
334       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
335       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
336       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
337       nzi  += nlnk;
338       row   = lnk[row];
339     }
340     bi[i+1] = bi[i] + nzi;
341     im[i]   = nzi;
342 
343     /* mark bdiag */
344     nzbd = 0;
345     nnz  = nzi;
346     k    = lnk[n];
347     while (nnz-- && k < i) {
348       nzbd++;
349       k = lnk[k];
350     }
351     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
352 
353     /* if free space is not available, make more free space */
354     if (current_space->local_remaining<nzi) {
355       /* estimated additional space needed */
356       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi));
357       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
358       reallocs++;
359     }
360 
361     /* copy data into free space, then initialize lnk */
362     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
363 
364     bi_ptr[i]                       = current_space->array;
365     current_space->array           += nzi;
366     current_space->local_used      += nzi;
367     current_space->local_remaining -= nzi;
368   }
369 
370   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
371   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
372 
373   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
374   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
375   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
376   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
377   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
378 
379   /* put together the new matrix */
380   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
381   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr);
382   b    = (Mat_SeqAIJ*)(B)->data;
383 
384   b->free_a       = PETSC_TRUE;
385   b->free_ij      = PETSC_TRUE;
386   b->singlemalloc = PETSC_FALSE;
387 
388   ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr);
389 
390   b->j    = bj;
391   b->i    = bi;
392   b->diag = bdiag;
393   b->ilen = 0;
394   b->imax = 0;
395   b->row  = isrow;
396   b->col  = iscol;
397   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
398   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
399   b->icol = isicol;
400   ierr    = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
401 
402   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
403   ierr     = PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
404   b->maxnz = b->nz = bdiag[0]+1;
405 
406   B->factortype            = MAT_FACTOR_LU;
407   B->info.factor_mallocs   = reallocs;
408   B->info.fill_ratio_given = f;
409 
410   if (ai[n]) {
411     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
412   } else {
413     B->info.fill_ratio_needed = 0.0;
414   }
415 #if defined(PETSC_USE_INFO)
416   if (ai[n] != 0) {
417     PetscReal af = B->info.fill_ratio_needed;
418     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
419     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
420     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr);
421     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
422   } else {
423     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
424   }
425 #endif
426   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
427   if (a->inode.size) {
428     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
429   }
430   ierr = MatSeqAIJCheckInode_FactorLU(B);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 /*
435     Trouble in factorization, should we dump the original matrix?
436 */
437 #undef __FUNCT__
438 #define __FUNCT__ "MatFactorDumpMatrix"
439 PetscErrorCode MatFactorDumpMatrix(Mat A)
440 {
441   PetscErrorCode ierr;
442   PetscBool      flg = PETSC_FALSE;
443 
444   PetscFunctionBegin;
445   ierr = PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL);CHKERRQ(ierr);
446   if (flg) {
447     PetscViewer viewer;
448     char        filename[PETSC_MAX_PATH_LEN];
449 
450     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
451     ierr = PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
452     ierr = MatView(A,viewer);CHKERRQ(ierr);
453     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
454   }
455   PetscFunctionReturn(0);
456 }
457 
458 #undef __FUNCT__
459 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ"
460 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
461 {
462   Mat             C     =B;
463   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
464   IS              isrow = b->row,isicol = b->icol;
465   PetscErrorCode  ierr;
466   const PetscInt  *r,*ic,*ics;
467   const PetscInt  n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
468   PetscInt        i,j,k,nz,nzL,row,*pj;
469   const PetscInt  *ajtmp,*bjtmp;
470   MatScalar       *rtmp,*pc,multiplier,*pv;
471   const MatScalar *aa=a->a,*v;
472   PetscBool       row_identity,col_identity;
473   FactorShiftCtx  sctx;
474   const PetscInt  *ddiag;
475   PetscReal       rs;
476   MatScalar       d;
477 
478   PetscFunctionBegin;
479   /* MatPivotSetUp(): initialize shift context sctx */
480   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
481 
482   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
483     ddiag          = a->diag;
484     sctx.shift_top = info->zeropivot;
485     for (i=0; i<n; i++) {
486       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
487       d  = (aa)[ddiag[i]];
488       rs = -PetscAbsScalar(d) - PetscRealPart(d);
489       v  = aa+ai[i];
490       nz = ai[i+1] - ai[i];
491       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
492       if (rs>sctx.shift_top) sctx.shift_top = rs;
493     }
494     sctx.shift_top *= 1.1;
495     sctx.nshift_max = 5;
496     sctx.shift_lo   = 0.;
497     sctx.shift_hi   = 1.;
498   }
499 
500   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
501   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
502   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
503   ics  = ic;
504 
505   do {
506     sctx.newshift = PETSC_FALSE;
507     for (i=0; i<n; i++) {
508       /* zero rtmp */
509       /* L part */
510       nz    = bi[i+1] - bi[i];
511       bjtmp = bj + bi[i];
512       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
513 
514       /* U part */
515       nz    = bdiag[i]-bdiag[i+1];
516       bjtmp = bj + bdiag[i+1]+1;
517       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
518 
519       /* load in initial (unfactored row) */
520       nz    = ai[r[i]+1] - ai[r[i]];
521       ajtmp = aj + ai[r[i]];
522       v     = aa + ai[r[i]];
523       for (j=0; j<nz; j++) {
524         rtmp[ics[ajtmp[j]]] = v[j];
525       }
526       /* ZeropivotApply() */
527       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
528 
529       /* elimination */
530       bjtmp = bj + bi[i];
531       row   = *bjtmp++;
532       nzL   = bi[i+1] - bi[i];
533       for (k=0; k < nzL; k++) {
534         pc = rtmp + row;
535         if (*pc != 0.0) {
536           pv         = b->a + bdiag[row];
537           multiplier = *pc * (*pv);
538           *pc        = multiplier;
539 
540           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
541           pv = b->a + bdiag[row+1]+1;
542           nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
543 
544           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
545           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
546         }
547         row = *bjtmp++;
548       }
549 
550       /* finished row so stick it into b->a */
551       rs = 0.0;
552       /* L part */
553       pv = b->a + bi[i];
554       pj = b->j + bi[i];
555       nz = bi[i+1] - bi[i];
556       for (j=0; j<nz; j++) {
557         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
558       }
559 
560       /* U part */
561       pv = b->a + bdiag[i+1]+1;
562       pj = b->j + bdiag[i+1]+1;
563       nz = bdiag[i] - bdiag[i+1]-1;
564       for (j=0; j<nz; j++) {
565         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
566       }
567 
568       sctx.rs = rs;
569       sctx.pv = rtmp[i];
570       ierr    = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr);
571       if (sctx.newshift) break; /* break for-loop */
572       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
573 
574       /* Mark diagonal and invert diagonal for simplier triangular solves */
575       pv  = b->a + bdiag[i];
576       *pv = 1.0/rtmp[i];
577 
578     } /* endof for (i=0; i<n; i++) { */
579 
580     /* MatPivotRefine() */
581     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
582       /*
583        * if no shift in this attempt & shifting & started shifting & can refine,
584        * then try lower shift
585        */
586       sctx.shift_hi       = sctx.shift_fraction;
587       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
588       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
589       sctx.newshift       = PETSC_TRUE;
590       sctx.nshift++;
591     }
592   } while (sctx.newshift);
593 
594   ierr = PetscFree(rtmp);CHKERRQ(ierr);
595   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
596   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
597 
598   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
599   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
600   if (b->inode.size) {
601     C->ops->solve = MatSolve_SeqAIJ_Inode;
602   } else if (row_identity && col_identity) {
603     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
604   } else {
605     C->ops->solve = MatSolve_SeqAIJ;
606   }
607   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
608   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
609   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
610   C->ops->matsolve          = MatMatSolve_SeqAIJ;
611   C->assembled              = PETSC_TRUE;
612   C->preallocated           = PETSC_TRUE;
613 
614   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
615 
616   /* MatShiftView(A,info,&sctx) */
617   if (sctx.nshift) {
618     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
619       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
620     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
621       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
622     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
623       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr);
624     }
625   }
626   PetscFunctionReturn(0);
627 }
628 
629 #undef __FUNCT__
630 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace"
631 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
632 {
633   Mat             C     =B;
634   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
635   IS              isrow = b->row,isicol = b->icol;
636   PetscErrorCode  ierr;
637   const PetscInt  *r,*ic,*ics;
638   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
639   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
640   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
641   MatScalar       *pv,*rtmp,*pc,multiplier,d;
642   const MatScalar *v,*aa=a->a;
643   PetscReal       rs=0.0;
644   FactorShiftCtx  sctx;
645   const PetscInt  *ddiag;
646   PetscBool       row_identity, col_identity;
647 
648   PetscFunctionBegin;
649   /* MatPivotSetUp(): initialize shift context sctx */
650   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
651 
652   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
653     ddiag          = a->diag;
654     sctx.shift_top = info->zeropivot;
655     for (i=0; i<n; i++) {
656       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
657       d  = (aa)[ddiag[i]];
658       rs = -PetscAbsScalar(d) - PetscRealPart(d);
659       v  = aa+ai[i];
660       nz = ai[i+1] - ai[i];
661       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
662       if (rs>sctx.shift_top) sctx.shift_top = rs;
663     }
664     sctx.shift_top *= 1.1;
665     sctx.nshift_max = 5;
666     sctx.shift_lo   = 0.;
667     sctx.shift_hi   = 1.;
668   }
669 
670   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
671   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
672   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
673   ics  = ic;
674 
675   do {
676     sctx.newshift = PETSC_FALSE;
677     for (i=0; i<n; i++) {
678       nz    = bi[i+1] - bi[i];
679       bjtmp = bj + bi[i];
680       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
681 
682       /* load in initial (unfactored row) */
683       nz    = ai[r[i]+1] - ai[r[i]];
684       ajtmp = aj + ai[r[i]];
685       v     = aa + ai[r[i]];
686       for (j=0; j<nz; j++) {
687         rtmp[ics[ajtmp[j]]] = v[j];
688       }
689       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
690 
691       row = *bjtmp++;
692       while  (row < i) {
693         pc = rtmp + row;
694         if (*pc != 0.0) {
695           pv         = b->a + diag_offset[row];
696           pj         = b->j + diag_offset[row] + 1;
697           multiplier = *pc / *pv++;
698           *pc        = multiplier;
699           nz         = bi[row+1] - diag_offset[row] - 1;
700           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
701           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
702         }
703         row = *bjtmp++;
704       }
705       /* finished row so stick it into b->a */
706       pv   = b->a + bi[i];
707       pj   = b->j + bi[i];
708       nz   = bi[i+1] - bi[i];
709       diag = diag_offset[i] - bi[i];
710       rs   = 0.0;
711       for (j=0; j<nz; j++) {
712         pv[j] = rtmp[pj[j]];
713         rs   += PetscAbsScalar(pv[j]);
714       }
715       rs -= PetscAbsScalar(pv[diag]);
716 
717       sctx.rs = rs;
718       sctx.pv = pv[diag];
719       ierr    = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr);
720       if (sctx.newshift) break;
721       pv[diag] = sctx.pv;
722     }
723 
724     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
725       /*
726        * if no shift in this attempt & shifting & started shifting & can refine,
727        * then try lower shift
728        */
729       sctx.shift_hi       = sctx.shift_fraction;
730       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
731       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
732       sctx.newshift       = PETSC_TRUE;
733       sctx.nshift++;
734     }
735   } while (sctx.newshift);
736 
737   /* invert diagonal entries for simplier triangular solves */
738   for (i=0; i<n; i++) {
739     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
740   }
741   ierr = PetscFree(rtmp);CHKERRQ(ierr);
742   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
743   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
744 
745   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
746   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
747   if (row_identity && col_identity) {
748     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
749   } else {
750     C->ops->solve = MatSolve_SeqAIJ_inplace;
751   }
752   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
753   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
754   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
755   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
756 
757   C->assembled    = PETSC_TRUE;
758   C->preallocated = PETSC_TRUE;
759 
760   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
761   if (sctx.nshift) {
762     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
763       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
764     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
765       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
766     }
767   }
768   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
769   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
770 
771   ierr = MatSeqAIJCheckInode(C);CHKERRQ(ierr);
772   PetscFunctionReturn(0);
773 }
774 
775 /*
776    This routine implements inplace ILU(0) with row or/and column permutations.
777    Input:
778      A - original matrix
779    Output;
780      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
781          a->j (col index) is permuted by the inverse of colperm, then sorted
782          a->a reordered accordingly with a->j
783          a->diag (ptr to diagonal elements) is updated.
784 */
785 #undef __FUNCT__
786 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm"
787 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
788 {
789   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data;
790   IS              isrow = a->row,isicol = a->icol;
791   PetscErrorCode  ierr;
792   const PetscInt  *r,*ic,*ics;
793   PetscInt        i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
794   PetscInt        *ajtmp,nz,row;
795   PetscInt        *diag = a->diag,nbdiag,*pj;
796   PetscScalar     *rtmp,*pc,multiplier,d;
797   MatScalar       *pv,*v;
798   PetscReal       rs;
799   FactorShiftCtx  sctx;
800   const MatScalar *aa=a->a,*vtmp;
801 
802   PetscFunctionBegin;
803   if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
804 
805   /* MatPivotSetUp(): initialize shift context sctx */
806   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
807 
808   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
809     const PetscInt *ddiag = a->diag;
810     sctx.shift_top = info->zeropivot;
811     for (i=0; i<n; i++) {
812       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
813       d    = (aa)[ddiag[i]];
814       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
815       vtmp = aa+ai[i];
816       nz   = ai[i+1] - ai[i];
817       for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
818       if (rs>sctx.shift_top) sctx.shift_top = rs;
819     }
820     sctx.shift_top *= 1.1;
821     sctx.nshift_max = 5;
822     sctx.shift_lo   = 0.;
823     sctx.shift_hi   = 1.;
824   }
825 
826   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
827   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
828   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
829   ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr);
830   ics  = ic;
831 
832 #if defined(MV)
833   sctx.shift_top      = 0.;
834   sctx.nshift_max     = 0;
835   sctx.shift_lo       = 0.;
836   sctx.shift_hi       = 0.;
837   sctx.shift_fraction = 0.;
838 
839   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
840     sctx.shift_top = 0.;
841     for (i=0; i<n; i++) {
842       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
843       d  = (a->a)[diag[i]];
844       rs = -PetscAbsScalar(d) - PetscRealPart(d);
845       v  = a->a+ai[i];
846       nz = ai[i+1] - ai[i];
847       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
848       if (rs>sctx.shift_top) sctx.shift_top = rs;
849     }
850     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
851     sctx.shift_top *= 1.1;
852     sctx.nshift_max = 5;
853     sctx.shift_lo   = 0.;
854     sctx.shift_hi   = 1.;
855   }
856 
857   sctx.shift_amount = 0.;
858   sctx.nshift       = 0;
859 #endif
860 
861   do {
862     sctx.newshift = PETSC_FALSE;
863     for (i=0; i<n; i++) {
864       /* load in initial unfactored row */
865       nz    = ai[r[i]+1] - ai[r[i]];
866       ajtmp = aj + ai[r[i]];
867       v     = a->a + ai[r[i]];
868       /* sort permuted ajtmp and values v accordingly */
869       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
870       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
871 
872       diag[r[i]] = ai[r[i]];
873       for (j=0; j<nz; j++) {
874         rtmp[ajtmp[j]] = v[j];
875         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
876       }
877       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
878 
879       row = *ajtmp++;
880       while  (row < i) {
881         pc = rtmp + row;
882         if (*pc != 0.0) {
883           pv = a->a + diag[r[row]];
884           pj = aj + diag[r[row]] + 1;
885 
886           multiplier = *pc / *pv++;
887           *pc        = multiplier;
888           nz         = ai[r[row]+1] - diag[r[row]] - 1;
889           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
890           ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
891         }
892         row = *ajtmp++;
893       }
894       /* finished row so overwrite it onto a->a */
895       pv     = a->a + ai[r[i]];
896       pj     = aj + ai[r[i]];
897       nz     = ai[r[i]+1] - ai[r[i]];
898       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
899 
900       rs = 0.0;
901       for (j=0; j<nz; j++) {
902         pv[j] = rtmp[pj[j]];
903         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
904       }
905 
906       sctx.rs = rs;
907       sctx.pv = pv[nbdiag];
908       ierr    = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr);
909       if (sctx.newshift) break;
910       pv[nbdiag] = sctx.pv;
911     }
912 
913     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
914       /*
915        * if no shift in this attempt & shifting & started shifting & can refine,
916        * then try lower shift
917        */
918       sctx.shift_hi       = sctx.shift_fraction;
919       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
920       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
921       sctx.newshift       = PETSC_TRUE;
922       sctx.nshift++;
923     }
924   } while (sctx.newshift);
925 
926   /* invert diagonal entries for simplier triangular solves */
927   for (i=0; i<n; i++) {
928     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
929   }
930 
931   ierr = PetscFree(rtmp);CHKERRQ(ierr);
932   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
933   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
934 
935   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
936   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
937   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
938   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
939 
940   A->assembled    = PETSC_TRUE;
941   A->preallocated = PETSC_TRUE;
942 
943   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
944   if (sctx.nshift) {
945     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
946       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
947     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
948       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
949     }
950   }
951   PetscFunctionReturn(0);
952 }
953 
954 /* ----------------------------------------------------------- */
955 #undef __FUNCT__
956 #define __FUNCT__ "MatLUFactor_SeqAIJ"
957 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
958 {
959   PetscErrorCode ierr;
960   Mat            C;
961 
962   PetscFunctionBegin;
963   ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
964   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
965   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
966 
967   A->ops->solve          = C->ops->solve;
968   A->ops->solvetranspose = C->ops->solvetranspose;
969 
970   ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
971   ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 /* ----------------------------------------------------------- */
975 
976 
977 #undef __FUNCT__
978 #define __FUNCT__ "MatSolve_SeqAIJ_inplace"
979 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
980 {
981   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
982   IS                iscol = a->col,isrow = a->row;
983   PetscErrorCode    ierr;
984   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
985   PetscInt          nz;
986   const PetscInt    *rout,*cout,*r,*c;
987   PetscScalar       *x,*tmp,*tmps,sum;
988   const PetscScalar *b;
989   const MatScalar   *aa = a->a,*v;
990 
991   PetscFunctionBegin;
992   if (!n) PetscFunctionReturn(0);
993 
994   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
995   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
996   tmp  = a->solve_work;
997 
998   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
999   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1000 
1001   /* forward solve the lower triangular */
1002   tmp[0] = b[*r++];
1003   tmps   = tmp;
1004   for (i=1; i<n; i++) {
1005     v   = aa + ai[i];
1006     vi  = aj + ai[i];
1007     nz  = a->diag[i] - ai[i];
1008     sum = b[*r++];
1009     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1010     tmp[i] = sum;
1011   }
1012 
1013   /* backward solve the upper triangular */
1014   for (i=n-1; i>=0; i--) {
1015     v   = aa + a->diag[i] + 1;
1016     vi  = aj + a->diag[i] + 1;
1017     nz  = ai[i+1] - a->diag[i] - 1;
1018     sum = tmp[i];
1019     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1020     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1021   }
1022 
1023   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1024   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1025   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1026   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1027   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 #undef __FUNCT__
1032 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace"
1033 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1034 {
1035   Mat_SeqAIJ      *a    = (Mat_SeqAIJ*)A->data;
1036   IS              iscol = a->col,isrow = a->row;
1037   PetscErrorCode  ierr;
1038   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1039   PetscInt        nz,neq;
1040   const PetscInt  *rout,*cout,*r,*c;
1041   PetscScalar     *x,*b,*tmp,*tmps,sum;
1042   const MatScalar *aa = a->a,*v;
1043   PetscBool       bisdense,xisdense;
1044 
1045   PetscFunctionBegin;
1046   if (!n) PetscFunctionReturn(0);
1047 
1048   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1049   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1050   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1051   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1052 
1053   ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
1054   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1055 
1056   tmp  = a->solve_work;
1057   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1058   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1059 
1060   for (neq=0; neq<B->cmap->n; neq++) {
1061     /* forward solve the lower triangular */
1062     tmp[0] = b[r[0]];
1063     tmps   = tmp;
1064     for (i=1; i<n; i++) {
1065       v   = aa + ai[i];
1066       vi  = aj + ai[i];
1067       nz  = a->diag[i] - ai[i];
1068       sum = b[r[i]];
1069       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1070       tmp[i] = sum;
1071     }
1072     /* backward solve the upper triangular */
1073     for (i=n-1; i>=0; i--) {
1074       v   = aa + a->diag[i] + 1;
1075       vi  = aj + a->diag[i] + 1;
1076       nz  = ai[i+1] - a->diag[i] - 1;
1077       sum = tmp[i];
1078       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1079       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1080     }
1081 
1082     b += n;
1083     x += n;
1084   }
1085   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1086   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1087   ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
1088   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1089   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatMatSolve_SeqAIJ"
1095 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1096 {
1097   Mat_SeqAIJ      *a    = (Mat_SeqAIJ*)A->data;
1098   IS              iscol = a->col,isrow = a->row;
1099   PetscErrorCode  ierr;
1100   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1101   PetscInt        nz,neq;
1102   const PetscInt  *rout,*cout,*r,*c;
1103   PetscScalar     *x,*b,*tmp,sum;
1104   const MatScalar *aa = a->a,*v;
1105   PetscBool       bisdense,xisdense;
1106 
1107   PetscFunctionBegin;
1108   if (!n) PetscFunctionReturn(0);
1109 
1110   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr);
1111   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1112   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr);
1113   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1114 
1115   ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
1116   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1117 
1118   tmp  = a->solve_work;
1119   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1120   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1121 
1122   for (neq=0; neq<B->cmap->n; neq++) {
1123     /* forward solve the lower triangular */
1124     tmp[0] = b[r[0]];
1125     v      = aa;
1126     vi     = aj;
1127     for (i=1; i<n; i++) {
1128       nz  = ai[i+1] - ai[i];
1129       sum = b[r[i]];
1130       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1131       tmp[i] = sum;
1132       v     += nz; vi += nz;
1133     }
1134 
1135     /* backward solve the upper triangular */
1136     for (i=n-1; i>=0; i--) {
1137       v   = aa + adiag[i+1]+1;
1138       vi  = aj + adiag[i+1]+1;
1139       nz  = adiag[i]-adiag[i+1]-1;
1140       sum = tmp[i];
1141       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1142       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1143     }
1144 
1145     b += n;
1146     x += n;
1147   }
1148   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1149   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1150   ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
1151   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1152   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 #undef __FUNCT__
1157 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm"
1158 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1159 {
1160   Mat_SeqAIJ      *a    = (Mat_SeqAIJ*)A->data;
1161   IS              iscol = a->col,isrow = a->row;
1162   PetscErrorCode  ierr;
1163   const PetscInt  *r,*c,*rout,*cout;
1164   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1165   PetscInt        nz,row;
1166   PetscScalar     *x,*b,*tmp,*tmps,sum;
1167   const MatScalar *aa = a->a,*v;
1168 
1169   PetscFunctionBegin;
1170   if (!n) PetscFunctionReturn(0);
1171 
1172   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1173   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1174   tmp  = a->solve_work;
1175 
1176   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1177   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1178 
1179   /* forward solve the lower triangular */
1180   tmp[0] = b[*r++];
1181   tmps   = tmp;
1182   for (row=1; row<n; row++) {
1183     i   = rout[row]; /* permuted row */
1184     v   = aa + ai[i];
1185     vi  = aj + ai[i];
1186     nz  = a->diag[i] - ai[i];
1187     sum = b[*r++];
1188     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1189     tmp[row] = sum;
1190   }
1191 
1192   /* backward solve the upper triangular */
1193   for (row=n-1; row>=0; row--) {
1194     i   = rout[row]; /* permuted row */
1195     v   = aa + a->diag[i] + 1;
1196     vi  = aj + a->diag[i] + 1;
1197     nz  = ai[i+1] - a->diag[i] - 1;
1198     sum = tmp[row];
1199     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1200     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1201   }
1202 
1203   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1204   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1205   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1206   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1207   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 /* ----------------------------------------------------------- */
1212 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace"
1215 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1216 {
1217   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1218   PetscErrorCode    ierr;
1219   PetscInt          n   = A->rmap->n;
1220   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1221   PetscScalar       *x;
1222   const PetscScalar *b;
1223   const MatScalar   *aa = a->a;
1224 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1225   PetscInt        adiag_i,i,nz,ai_i;
1226   const PetscInt  *vi;
1227   const MatScalar *v;
1228   PetscScalar     sum;
1229 #endif
1230 
1231   PetscFunctionBegin;
1232   if (!n) PetscFunctionReturn(0);
1233 
1234   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1235   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1236 
1237 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1238   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1239 #else
1240   /* forward solve the lower triangular */
1241   x[0] = b[0];
1242   for (i=1; i<n; i++) {
1243     ai_i = ai[i];
1244     v    = aa + ai_i;
1245     vi   = aj + ai_i;
1246     nz   = adiag[i] - ai_i;
1247     sum  = b[i];
1248     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1249     x[i] = sum;
1250   }
1251 
1252   /* backward solve the upper triangular */
1253   for (i=n-1; i>=0; i--) {
1254     adiag_i = adiag[i];
1255     v       = aa + adiag_i + 1;
1256     vi      = aj + adiag_i + 1;
1257     nz      = ai[i+1] - adiag_i - 1;
1258     sum     = x[i];
1259     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1260     x[i] = sum*aa[adiag_i];
1261   }
1262 #endif
1263   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1264   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1265   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace"
1271 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1272 {
1273   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1274   IS                iscol = a->col,isrow = a->row;
1275   PetscErrorCode    ierr;
1276   PetscInt          i, n = A->rmap->n,j;
1277   PetscInt          nz;
1278   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1279   PetscScalar       *x,*tmp,sum;
1280   const PetscScalar *b;
1281   const MatScalar   *aa = a->a,*v;
1282 
1283   PetscFunctionBegin;
1284   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1285 
1286   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1287   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1288   tmp  = a->solve_work;
1289 
1290   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1291   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1292 
1293   /* forward solve the lower triangular */
1294   tmp[0] = b[*r++];
1295   for (i=1; i<n; i++) {
1296     v   = aa + ai[i];
1297     vi  = aj + ai[i];
1298     nz  = a->diag[i] - ai[i];
1299     sum = b[*r++];
1300     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1301     tmp[i] = sum;
1302   }
1303 
1304   /* backward solve the upper triangular */
1305   for (i=n-1; i>=0; i--) {
1306     v   = aa + a->diag[i] + 1;
1307     vi  = aj + a->diag[i] + 1;
1308     nz  = ai[i+1] - a->diag[i] - 1;
1309     sum = tmp[i];
1310     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1311     tmp[i]   = sum*aa[a->diag[i]];
1312     x[*c--] += tmp[i];
1313   }
1314 
1315   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1316   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1317   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1318   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1319   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatSolveAdd_SeqAIJ"
1325 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1326 {
1327   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1328   IS                iscol = a->col,isrow = a->row;
1329   PetscErrorCode    ierr;
1330   PetscInt          i, n = A->rmap->n,j;
1331   PetscInt          nz;
1332   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1333   PetscScalar       *x,*tmp,sum;
1334   const PetscScalar *b;
1335   const MatScalar   *aa = a->a,*v;
1336 
1337   PetscFunctionBegin;
1338   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1339 
1340   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1341   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1342   tmp  = a->solve_work;
1343 
1344   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1345   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1346 
1347   /* forward solve the lower triangular */
1348   tmp[0] = b[r[0]];
1349   v      = aa;
1350   vi     = aj;
1351   for (i=1; i<n; i++) {
1352     nz  = ai[i+1] - ai[i];
1353     sum = b[r[i]];
1354     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1355     tmp[i] = sum;
1356     v     += nz;
1357     vi    += nz;
1358   }
1359 
1360   /* backward solve the upper triangular */
1361   v  = aa + adiag[n-1];
1362   vi = aj + adiag[n-1];
1363   for (i=n-1; i>=0; i--) {
1364     nz  = adiag[i] - adiag[i+1] - 1;
1365     sum = tmp[i];
1366     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1367     tmp[i]   = sum*v[nz];
1368     x[c[i]] += tmp[i];
1369     v       += nz+1; vi += nz+1;
1370   }
1371 
1372   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1373   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1374   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1375   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1376   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace"
1382 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1383 {
1384   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1385   IS                iscol = a->col,isrow = a->row;
1386   PetscErrorCode    ierr;
1387   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1388   PetscInt          i,n = A->rmap->n,j;
1389   PetscInt          nz;
1390   PetscScalar       *x,*tmp,s1;
1391   const MatScalar   *aa = a->a,*v;
1392   const PetscScalar *b;
1393 
1394   PetscFunctionBegin;
1395   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1396   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1397   tmp  = a->solve_work;
1398 
1399   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1400   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1401 
1402   /* copy the b into temp work space according to permutation */
1403   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1404 
1405   /* forward solve the U^T */
1406   for (i=0; i<n; i++) {
1407     v   = aa + diag[i];
1408     vi  = aj + diag[i] + 1;
1409     nz  = ai[i+1] - diag[i] - 1;
1410     s1  = tmp[i];
1411     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1412     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1413     tmp[i] = s1;
1414   }
1415 
1416   /* backward solve the L^T */
1417   for (i=n-1; i>=0; i--) {
1418     v  = aa + diag[i] - 1;
1419     vi = aj + diag[i] - 1;
1420     nz = diag[i] - ai[i];
1421     s1 = tmp[i];
1422     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1423   }
1424 
1425   /* copy tmp into x according to permutation */
1426   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1427 
1428   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1429   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1430   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1431   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1432 
1433   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 #undef __FUNCT__
1438 #define __FUNCT__ "MatSolveTranspose_SeqAIJ"
1439 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1440 {
1441   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1442   IS                iscol = a->col,isrow = a->row;
1443   PetscErrorCode    ierr;
1444   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1445   PetscInt          i,n = A->rmap->n,j;
1446   PetscInt          nz;
1447   PetscScalar       *x,*tmp,s1;
1448   const MatScalar   *aa = a->a,*v;
1449   const PetscScalar *b;
1450 
1451   PetscFunctionBegin;
1452   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1453   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1454   tmp  = a->solve_work;
1455 
1456   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1457   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1458 
1459   /* copy the b into temp work space according to permutation */
1460   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1461 
1462   /* forward solve the U^T */
1463   for (i=0; i<n; i++) {
1464     v   = aa + adiag[i+1] + 1;
1465     vi  = aj + adiag[i+1] + 1;
1466     nz  = adiag[i] - adiag[i+1] - 1;
1467     s1  = tmp[i];
1468     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1469     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1470     tmp[i] = s1;
1471   }
1472 
1473   /* backward solve the L^T */
1474   for (i=n-1; i>=0; i--) {
1475     v  = aa + ai[i];
1476     vi = aj + ai[i];
1477     nz = ai[i+1] - ai[i];
1478     s1 = tmp[i];
1479     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1480   }
1481 
1482   /* copy tmp into x according to permutation */
1483   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1484 
1485   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1486   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1487   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1488   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1489 
1490   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace"
1496 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1497 {
1498   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1499   IS                iscol = a->col,isrow = a->row;
1500   PetscErrorCode    ierr;
1501   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1502   PetscInt          i,n = A->rmap->n,j;
1503   PetscInt          nz;
1504   PetscScalar       *x,*tmp,s1;
1505   const MatScalar   *aa = a->a,*v;
1506   const PetscScalar *b;
1507 
1508   PetscFunctionBegin;
1509   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1510   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1511   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1512   tmp  = a->solve_work;
1513 
1514   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1515   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1516 
1517   /* copy the b into temp work space according to permutation */
1518   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1519 
1520   /* forward solve the U^T */
1521   for (i=0; i<n; i++) {
1522     v   = aa + diag[i];
1523     vi  = aj + diag[i] + 1;
1524     nz  = ai[i+1] - diag[i] - 1;
1525     s1  = tmp[i];
1526     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1527     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1528     tmp[i] = s1;
1529   }
1530 
1531   /* backward solve the L^T */
1532   for (i=n-1; i>=0; i--) {
1533     v  = aa + diag[i] - 1;
1534     vi = aj + diag[i] - 1;
1535     nz = diag[i] - ai[i];
1536     s1 = tmp[i];
1537     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1538   }
1539 
1540   /* copy tmp into x according to permutation */
1541   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1542 
1543   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1544   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1545   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1546   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1547 
1548   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1549   PetscFunctionReturn(0);
1550 }
1551 
1552 #undef __FUNCT__
1553 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ"
1554 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1555 {
1556   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1557   IS                iscol = a->col,isrow = a->row;
1558   PetscErrorCode    ierr;
1559   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1560   PetscInt          i,n = A->rmap->n,j;
1561   PetscInt          nz;
1562   PetscScalar       *x,*tmp,s1;
1563   const MatScalar   *aa = a->a,*v;
1564   const PetscScalar *b;
1565 
1566   PetscFunctionBegin;
1567   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1568   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1569   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1570   tmp  = a->solve_work;
1571 
1572   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1573   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1574 
1575   /* copy the b into temp work space according to permutation */
1576   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1577 
1578   /* forward solve the U^T */
1579   for (i=0; i<n; i++) {
1580     v   = aa + adiag[i+1] + 1;
1581     vi  = aj + adiag[i+1] + 1;
1582     nz  = adiag[i] - adiag[i+1] - 1;
1583     s1  = tmp[i];
1584     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1585     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1586     tmp[i] = s1;
1587   }
1588 
1589 
1590   /* backward solve the L^T */
1591   for (i=n-1; i>=0; i--) {
1592     v  = aa + ai[i];
1593     vi = aj + ai[i];
1594     nz = ai[i+1] - ai[i];
1595     s1 = tmp[i];
1596     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1597   }
1598 
1599   /* copy tmp into x according to permutation */
1600   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1601 
1602   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1603   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1604   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1605   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1606 
1607   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 /* ----------------------------------------------------------------*/
1612 
1613 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool);
1614 
1615 /*
1616    ilu() under revised new data structure.
1617    Factored arrays bj and ba are stored as
1618      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1619 
1620    bi=fact->i is an array of size n+1, in which
1621    bi+
1622      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1623      bi[n]:  points to L(n-1,n-1)+1
1624 
1625   bdiag=fact->diag is an array of size n+1,in which
1626      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1627      bdiag[n]: points to entry of U(n-1,0)-1
1628 
1629    U(i,:) contains bdiag[i] as its last entry, i.e.,
1630     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1631 */
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0"
1634 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1635 {
1636   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
1637   PetscErrorCode ierr;
1638   const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1639   PetscInt       i,j,k=0,nz,*bi,*bj,*bdiag;
1640   IS             isicol;
1641 
1642   PetscFunctionBegin;
1643   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1644   ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr);
1645   b    = (Mat_SeqAIJ*)(fact)->data;
1646 
1647   /* allocate matrix arrays for new data structure */
1648   ierr = PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);CHKERRQ(ierr);
1649   ierr = PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1650 
1651   b->singlemalloc = PETSC_TRUE;
1652   if (!b->diag) {
1653     ierr = PetscMalloc1(n+1,&b->diag);CHKERRQ(ierr);
1654     ierr = PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1655   }
1656   bdiag = b->diag;
1657 
1658   if (n > 0) {
1659     ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr);
1660   }
1661 
1662   /* set bi and bj with new data structure */
1663   bi = b->i;
1664   bj = b->j;
1665 
1666   /* L part */
1667   bi[0] = 0;
1668   for (i=0; i<n; i++) {
1669     nz      = adiag[i] - ai[i];
1670     bi[i+1] = bi[i] + nz;
1671     aj      = a->j + ai[i];
1672     for (j=0; j<nz; j++) {
1673       /*   *bj = aj[j]; bj++; */
1674       bj[k++] = aj[j];
1675     }
1676   }
1677 
1678   /* U part */
1679   bdiag[n] = bi[n]-1;
1680   for (i=n-1; i>=0; i--) {
1681     nz = ai[i+1] - adiag[i] - 1;
1682     aj = a->j + adiag[i] + 1;
1683     for (j=0; j<nz; j++) {
1684       /*      *bj = aj[j]; bj++; */
1685       bj[k++] = aj[j];
1686     }
1687     /* diag[i] */
1688     /*    *bj = i; bj++; */
1689     bj[k++]  = i;
1690     bdiag[i] = bdiag[i+1] + nz + 1;
1691   }
1692 
1693   fact->factortype             = MAT_FACTOR_ILU;
1694   fact->info.factor_mallocs    = 0;
1695   fact->info.fill_ratio_given  = info->fill;
1696   fact->info.fill_ratio_needed = 1.0;
1697   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1698   ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr);
1699 
1700   b       = (Mat_SeqAIJ*)(fact)->data;
1701   b->row  = isrow;
1702   b->col  = iscol;
1703   b->icol = isicol;
1704   ierr    = PetscMalloc1(fact->rmap->n+1,&b->solve_work);CHKERRQ(ierr);
1705   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1706   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1707   PetscFunctionReturn(0);
1708 }
1709 
1710 #undef __FUNCT__
1711 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ"
1712 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1713 {
1714   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1715   IS                 isicol;
1716   PetscErrorCode     ierr;
1717   const PetscInt     *r,*ic;
1718   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1719   PetscInt           *bi,*cols,nnz,*cols_lvl;
1720   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1721   PetscInt           i,levels,diagonal_fill;
1722   PetscBool          col_identity,row_identity,missing;
1723   PetscReal          f;
1724   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1725   PetscBT            lnkbt;
1726   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1727   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1728   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1729 
1730   PetscFunctionBegin;
1731   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1732   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1733   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1734 
1735   levels = (PetscInt)info->levels;
1736   ierr   = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1737   ierr   = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1738   if (!levels && row_identity && col_identity) {
1739     /* special case: ilu(0) with natural ordering */
1740     ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1741     if (a->inode.size) {
1742       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1743     }
1744     PetscFunctionReturn(0);
1745   }
1746 
1747   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1748   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1749   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1750 
1751   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1752   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
1753   ierr  = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
1754   bi[0] = bdiag[0] = 0;
1755   ierr  = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr);
1756 
1757   /* create a linked list for storing column indices of the active row */
1758   nlnk = n + 1;
1759   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1760 
1761   /* initial FreeSpace size is f*(ai[n]+1) */
1762   f                 = info->fill;
1763   diagonal_fill     = (PetscInt)info->diagonal_fill;
1764   ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr);
1765   current_space     = free_space;
1766   ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr);
1767   current_space_lvl = free_space_lvl;
1768   for (i=0; i<n; i++) {
1769     nzi = 0;
1770     /* copy current row into linked list */
1771     nnz = ai[r[i]+1] - ai[r[i]];
1772     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1773     cols   = aj + ai[r[i]];
1774     lnk[i] = -1; /* marker to indicate if diagonal exists */
1775     ierr   = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1776     nzi   += nlnk;
1777 
1778     /* make sure diagonal entry is included */
1779     if (diagonal_fill && lnk[i] == -1) {
1780       fm = n;
1781       while (lnk[fm] < i) fm = lnk[fm];
1782       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1783       lnk[fm]    = i;
1784       lnk_lvl[i] = 0;
1785       nzi++; dcount++;
1786     }
1787 
1788     /* add pivot rows into the active row */
1789     nzbd = 0;
1790     prow = lnk[n];
1791     while (prow < i) {
1792       nnz      = bdiag[prow];
1793       cols     = bj_ptr[prow] + nnz + 1;
1794       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1795       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1796       ierr     = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1797       nzi     += nlnk;
1798       prow     = lnk[prow];
1799       nzbd++;
1800     }
1801     bdiag[i] = nzbd;
1802     bi[i+1]  = bi[i] + nzi;
1803     /* if free space is not available, make more free space */
1804     if (current_space->local_remaining<nzi) {
1805       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */
1806       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1807       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1808       reallocs++;
1809     }
1810 
1811     /* copy data into free_space and free_space_lvl, then initialize lnk */
1812     ierr         = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1813     bj_ptr[i]    = current_space->array;
1814     bjlvl_ptr[i] = current_space_lvl->array;
1815 
1816     /* make sure the active row i has diagonal entry */
1817     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1818 
1819     current_space->array               += nzi;
1820     current_space->local_used          += nzi;
1821     current_space->local_remaining     -= nzi;
1822     current_space_lvl->array           += nzi;
1823     current_space_lvl->local_used      += nzi;
1824     current_space_lvl->local_remaining -= nzi;
1825   }
1826 
1827   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1828   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1829   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1830   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
1831   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
1832 
1833   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1834   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1835   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
1836 
1837 #if defined(PETSC_USE_INFO)
1838   {
1839     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1840     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
1841     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
1842     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr);
1843     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1844     if (diagonal_fill) {
1845       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr);
1846     }
1847   }
1848 #endif
1849   /* put together the new matrix */
1850   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
1851   ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr);
1852   b    = (Mat_SeqAIJ*)(fact)->data;
1853 
1854   b->free_a       = PETSC_TRUE;
1855   b->free_ij      = PETSC_TRUE;
1856   b->singlemalloc = PETSC_FALSE;
1857 
1858   ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr);
1859 
1860   b->j    = bj;
1861   b->i    = bi;
1862   b->diag = bdiag;
1863   b->ilen = 0;
1864   b->imax = 0;
1865   b->row  = isrow;
1866   b->col  = iscol;
1867   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1868   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1869   b->icol = isicol;
1870 
1871   ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
1872   /* In b structure:  Free imax, ilen, old a, old j.
1873      Allocate bdiag, solve_work, new a, new j */
1874   ierr     = PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1875   b->maxnz = b->nz = bdiag[0]+1;
1876 
1877   (fact)->info.factor_mallocs    = reallocs;
1878   (fact)->info.fill_ratio_given  = f;
1879   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1880   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1881   if (a->inode.size) {
1882     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1883   }
1884   ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr);
1885   PetscFunctionReturn(0);
1886 }
1887 
1888 #undef __FUNCT__
1889 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace"
1890 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1891 {
1892   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1893   IS                 isicol;
1894   PetscErrorCode     ierr;
1895   const PetscInt     *r,*ic;
1896   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1897   PetscInt           *bi,*cols,nnz,*cols_lvl;
1898   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1899   PetscInt           i,levels,diagonal_fill;
1900   PetscBool          col_identity,row_identity;
1901   PetscReal          f;
1902   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1903   PetscBT            lnkbt;
1904   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1905   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1906   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1907   PetscBool          missing;
1908 
1909   PetscFunctionBegin;
1910   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1911   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1912   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1913 
1914   f             = info->fill;
1915   levels        = (PetscInt)info->levels;
1916   diagonal_fill = (PetscInt)info->diagonal_fill;
1917 
1918   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1919 
1920   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1921   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1922   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1923     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr);
1924 
1925     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1926     if (a->inode.size) {
1927       (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1928     }
1929     fact->factortype               = MAT_FACTOR_ILU;
1930     (fact)->info.factor_mallocs    = 0;
1931     (fact)->info.fill_ratio_given  = info->fill;
1932     (fact)->info.fill_ratio_needed = 1.0;
1933 
1934     b    = (Mat_SeqAIJ*)(fact)->data;
1935     b->row  = isrow;
1936     b->col  = iscol;
1937     b->icol = isicol;
1938     ierr    = PetscMalloc1((fact)->rmap->n+1,&b->solve_work);CHKERRQ(ierr);
1939     ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1940     ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1941     PetscFunctionReturn(0);
1942   }
1943 
1944   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1945   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1946 
1947   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1948   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
1949   ierr  = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
1950   bi[0] = bdiag[0] = 0;
1951 
1952   ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr);
1953 
1954   /* create a linked list for storing column indices of the active row */
1955   nlnk = n + 1;
1956   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1957 
1958   /* initial FreeSpace size is f*(ai[n]+1) */
1959   ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr);
1960   current_space     = free_space;
1961   ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr);
1962   current_space_lvl = free_space_lvl;
1963 
1964   for (i=0; i<n; i++) {
1965     nzi = 0;
1966     /* copy current row into linked list */
1967     nnz = ai[r[i]+1] - ai[r[i]];
1968     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1969     cols   = aj + ai[r[i]];
1970     lnk[i] = -1; /* marker to indicate if diagonal exists */
1971     ierr   = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1972     nzi   += nlnk;
1973 
1974     /* make sure diagonal entry is included */
1975     if (diagonal_fill && lnk[i] == -1) {
1976       fm = n;
1977       while (lnk[fm] < i) fm = lnk[fm];
1978       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1979       lnk[fm]    = i;
1980       lnk_lvl[i] = 0;
1981       nzi++; dcount++;
1982     }
1983 
1984     /* add pivot rows into the active row */
1985     nzbd = 0;
1986     prow = lnk[n];
1987     while (prow < i) {
1988       nnz      = bdiag[prow];
1989       cols     = bj_ptr[prow] + nnz + 1;
1990       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1991       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1992       ierr     = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1993       nzi     += nlnk;
1994       prow     = lnk[prow];
1995       nzbd++;
1996     }
1997     bdiag[i] = nzbd;
1998     bi[i+1]  = bi[i] + nzi;
1999 
2000     /* if free space is not available, make more free space */
2001     if (current_space->local_remaining<nzi) {
2002       nnz  = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */
2003       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
2004       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
2005       reallocs++;
2006     }
2007 
2008     /* copy data into free_space and free_space_lvl, then initialize lnk */
2009     ierr         = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2010     bj_ptr[i]    = current_space->array;
2011     bjlvl_ptr[i] = current_space_lvl->array;
2012 
2013     /* make sure the active row i has diagonal entry */
2014     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2015 
2016     current_space->array               += nzi;
2017     current_space->local_used          += nzi;
2018     current_space->local_remaining     -= nzi;
2019     current_space_lvl->array           += nzi;
2020     current_space_lvl->local_used      += nzi;
2021     current_space_lvl->local_remaining -= nzi;
2022   }
2023 
2024   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2025   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2026 
2027   /* destroy list of free space and other temporary arrays */
2028   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
2029   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */
2030   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2031   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2032   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
2033 
2034 #if defined(PETSC_USE_INFO)
2035   {
2036     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2037     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
2038     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2039     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr);
2040     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2041     if (diagonal_fill) {
2042       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr);
2043     }
2044   }
2045 #endif
2046 
2047   /* put together the new matrix */
2048   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
2049   ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr);
2050   b    = (Mat_SeqAIJ*)(fact)->data;
2051 
2052   b->free_a       = PETSC_TRUE;
2053   b->free_ij      = PETSC_TRUE;
2054   b->singlemalloc = PETSC_FALSE;
2055 
2056   ierr = PetscMalloc1(bi[n],&b->a);CHKERRQ(ierr);
2057   b->j = bj;
2058   b->i = bi;
2059   for (i=0; i<n; i++) bdiag[i] += bi[i];
2060   b->diag = bdiag;
2061   b->ilen = 0;
2062   b->imax = 0;
2063   b->row  = isrow;
2064   b->col  = iscol;
2065   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2066   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2067   b->icol = isicol;
2068   ierr    = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
2069   /* In b structure:  Free imax, ilen, old a, old j.
2070      Allocate bdiag, solve_work, new a, new j */
2071   ierr     = PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
2072   b->maxnz = b->nz = bi[n];
2073 
2074   (fact)->info.factor_mallocs    = reallocs;
2075   (fact)->info.fill_ratio_given  = f;
2076   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2077   (fact)->ops->lufactornumeric   =  MatLUFactorNumeric_SeqAIJ_inplace;
2078   if (a->inode.size) {
2079     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2080   }
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 #undef __FUNCT__
2085 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ"
2086 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2087 {
2088   Mat            C = B;
2089   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2090   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2091   IS             ip=b->row,iip = b->icol;
2092   PetscErrorCode ierr;
2093   const PetscInt *rip,*riip;
2094   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2095   PetscInt       *ai=a->i,*aj=a->j;
2096   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2097   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2098   PetscBool      perm_identity;
2099   FactorShiftCtx sctx;
2100   PetscReal      rs;
2101   MatScalar      d,*v;
2102 
2103   PetscFunctionBegin;
2104   /* MatPivotSetUp(): initialize shift context sctx */
2105   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2106 
2107   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2108     sctx.shift_top = info->zeropivot;
2109     for (i=0; i<mbs; i++) {
2110       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2111       d  = (aa)[a->diag[i]];
2112       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2113       v  = aa+ai[i];
2114       nz = ai[i+1] - ai[i];
2115       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2116       if (rs>sctx.shift_top) sctx.shift_top = rs;
2117     }
2118     sctx.shift_top *= 1.1;
2119     sctx.nshift_max = 5;
2120     sctx.shift_lo   = 0.;
2121     sctx.shift_hi   = 1.;
2122   }
2123 
2124   ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2125   ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2126 
2127   /* allocate working arrays
2128      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2129      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2130   */
2131   ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);CHKERRQ(ierr);
2132 
2133   do {
2134     sctx.newshift = PETSC_FALSE;
2135 
2136     for (i=0; i<mbs; i++) c2r[i] = mbs;
2137     if (mbs) il[0] = 0;
2138 
2139     for (k = 0; k<mbs; k++) {
2140       /* zero rtmp */
2141       nz    = bi[k+1] - bi[k];
2142       bjtmp = bj + bi[k];
2143       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2144 
2145       /* load in initial unfactored row */
2146       bval = ba + bi[k];
2147       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2148       for (j = jmin; j < jmax; j++) {
2149         col = riip[aj[j]];
2150         if (col >= k) { /* only take upper triangular entry */
2151           rtmp[col] = aa[j];
2152           *bval++   = 0.0; /* for in-place factorization */
2153         }
2154       }
2155       /* shift the diagonal of the matrix: ZeropivotApply() */
2156       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2157 
2158       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2159       dk = rtmp[k];
2160       i  = c2r[k]; /* first row to be added to k_th row  */
2161 
2162       while (i < k) {
2163         nexti = c2r[i]; /* next row to be added to k_th row */
2164 
2165         /* compute multiplier, update diag(k) and U(i,k) */
2166         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2167         uikdi   = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2168         dk     += uikdi*ba[ili]; /* update diag[k] */
2169         ba[ili] = uikdi; /* -U(i,k) */
2170 
2171         /* add multiple of row i to k-th row */
2172         jmin = ili + 1; jmax = bi[i+1];
2173         if (jmin < jmax) {
2174           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2175           /* update il and c2r for row i */
2176           il[i] = jmin;
2177           j     = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2178         }
2179         i = nexti;
2180       }
2181 
2182       /* copy data into U(k,:) */
2183       rs   = 0.0;
2184       jmin = bi[k]; jmax = bi[k+1]-1;
2185       if (jmin < jmax) {
2186         for (j=jmin; j<jmax; j++) {
2187           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2188         }
2189         /* add the k-th row into il and c2r */
2190         il[k] = jmin;
2191         i     = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2192       }
2193 
2194       /* MatPivotCheck() */
2195       sctx.rs = rs;
2196       sctx.pv = dk;
2197       ierr    = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr);
2198       if (sctx.newshift) break;
2199       dk = sctx.pv;
2200 
2201       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2202     }
2203   } while (sctx.newshift);
2204 
2205   ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr);
2206   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2207   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2208 
2209   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2210   if (perm_identity) {
2211     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2212     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2213     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2214     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2215   } else {
2216     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2217     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2218     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2219     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2220   }
2221 
2222   C->assembled    = PETSC_TRUE;
2223   C->preallocated = PETSC_TRUE;
2224 
2225   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2226 
2227   /* MatPivotView() */
2228   if (sctx.nshift) {
2229     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2230       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
2231     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2232       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
2233     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2234       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr);
2235     }
2236   }
2237   PetscFunctionReturn(0);
2238 }
2239 
2240 #undef __FUNCT__
2241 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace"
2242 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2243 {
2244   Mat            C = B;
2245   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2246   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2247   IS             ip=b->row,iip = b->icol;
2248   PetscErrorCode ierr;
2249   const PetscInt *rip,*riip;
2250   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2251   PetscInt       *ai=a->i,*aj=a->j;
2252   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2253   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2254   PetscBool      perm_identity;
2255   FactorShiftCtx sctx;
2256   PetscReal      rs;
2257   MatScalar      d,*v;
2258 
2259   PetscFunctionBegin;
2260   /* MatPivotSetUp(): initialize shift context sctx */
2261   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2262 
2263   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2264     sctx.shift_top = info->zeropivot;
2265     for (i=0; i<mbs; i++) {
2266       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2267       d  = (aa)[a->diag[i]];
2268       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2269       v  = aa+ai[i];
2270       nz = ai[i+1] - ai[i];
2271       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2272       if (rs>sctx.shift_top) sctx.shift_top = rs;
2273     }
2274     sctx.shift_top *= 1.1;
2275     sctx.nshift_max = 5;
2276     sctx.shift_lo   = 0.;
2277     sctx.shift_hi   = 1.;
2278   }
2279 
2280   ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2281   ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2282 
2283   /* initialization */
2284   ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);CHKERRQ(ierr);
2285 
2286   do {
2287     sctx.newshift = PETSC_FALSE;
2288 
2289     for (i=0; i<mbs; i++) jl[i] = mbs;
2290     il[0] = 0;
2291 
2292     for (k = 0; k<mbs; k++) {
2293       /* zero rtmp */
2294       nz    = bi[k+1] - bi[k];
2295       bjtmp = bj + bi[k];
2296       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2297 
2298       bval = ba + bi[k];
2299       /* initialize k-th row by the perm[k]-th row of A */
2300       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2301       for (j = jmin; j < jmax; j++) {
2302         col = riip[aj[j]];
2303         if (col >= k) { /* only take upper triangular entry */
2304           rtmp[col] = aa[j];
2305           *bval++   = 0.0; /* for in-place factorization */
2306         }
2307       }
2308       /* shift the diagonal of the matrix */
2309       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2310 
2311       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2312       dk = rtmp[k];
2313       i  = jl[k]; /* first row to be added to k_th row  */
2314 
2315       while (i < k) {
2316         nexti = jl[i]; /* next row to be added to k_th row */
2317 
2318         /* compute multiplier, update diag(k) and U(i,k) */
2319         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2320         uikdi   = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2321         dk     += uikdi*ba[ili];
2322         ba[ili] = uikdi; /* -U(i,k) */
2323 
2324         /* add multiple of row i to k-th row */
2325         jmin = ili + 1; jmax = bi[i+1];
2326         if (jmin < jmax) {
2327           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2328           /* update il and jl for row i */
2329           il[i] = jmin;
2330           j     = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2331         }
2332         i = nexti;
2333       }
2334 
2335       /* shift the diagonals when zero pivot is detected */
2336       /* compute rs=sum of abs(off-diagonal) */
2337       rs   = 0.0;
2338       jmin = bi[k]+1;
2339       nz   = bi[k+1] - jmin;
2340       bcol = bj + jmin;
2341       for (j=0; j<nz; j++) {
2342         rs += PetscAbsScalar(rtmp[bcol[j]]);
2343       }
2344 
2345       sctx.rs = rs;
2346       sctx.pv = dk;
2347       ierr    = MatPivotCheck(B,A,info,&sctx,k);CHKERRQ(ierr);
2348       if (sctx.newshift) break;
2349       dk = sctx.pv;
2350 
2351       /* copy data into U(k,:) */
2352       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2353       jmin      = bi[k]+1; jmax = bi[k+1];
2354       if (jmin < jmax) {
2355         for (j=jmin; j<jmax; j++) {
2356           col = bj[j]; ba[j] = rtmp[col];
2357         }
2358         /* add the k-th row into il and jl */
2359         il[k] = jmin;
2360         i     = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2361       }
2362     }
2363   } while (sctx.newshift);
2364 
2365   ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr);
2366   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2367   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2368 
2369   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2370   if (perm_identity) {
2371     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2372     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2373     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2374     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2375   } else {
2376     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2377     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2378     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2379     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2380   }
2381 
2382   C->assembled    = PETSC_TRUE;
2383   C->preallocated = PETSC_TRUE;
2384 
2385   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2386   if (sctx.nshift) {
2387     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2388       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
2389     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2390       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
2391     }
2392   }
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 /*
2397    icc() under revised new data structure.
2398    Factored arrays bj and ba are stored as
2399      U(0,:),...,U(i,:),U(n-1,:)
2400 
2401    ui=fact->i is an array of size n+1, in which
2402    ui+
2403      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2404      ui[n]:  points to U(n-1,n-1)+1
2405 
2406   udiag=fact->diag is an array of size n,in which
2407      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2408 
2409    U(i,:) contains udiag[i] as its last entry, i.e.,
2410     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2411 */
2412 
2413 #undef __FUNCT__
2414 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ"
2415 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2416 {
2417   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2418   Mat_SeqSBAIJ       *b;
2419   PetscErrorCode     ierr;
2420   PetscBool          perm_identity,missing;
2421   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2422   const PetscInt     *rip,*riip;
2423   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2424   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2425   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2426   PetscReal          fill          =info->fill,levels=info->levels;
2427   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2428   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2429   PetscBT            lnkbt;
2430   IS                 iperm;
2431 
2432   PetscFunctionBegin;
2433   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2434   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2435   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2436   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2437   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2438 
2439   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
2440   ierr  = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr);
2441   ui[0] = 0;
2442 
2443   /* ICC(0) without matrix ordering: simply rearrange column indices */
2444   if (!levels && perm_identity) {
2445     for (i=0; i<am; i++) {
2446       ncols    = ai[i+1] - a->diag[i];
2447       ui[i+1]  = ui[i] + ncols;
2448       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2449     }
2450     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2451     cols = uj;
2452     for (i=0; i<am; i++) {
2453       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2454       ncols = ai[i+1] - a->diag[i] -1;
2455       for (j=0; j<ncols; j++) *cols++ = aj[j];
2456       *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2457     }
2458   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2459     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2460     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2461 
2462     /* initialization */
2463     ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr);
2464 
2465     /* jl: linked list for storing indices of the pivot rows
2466        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2467     ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr);
2468     for (i=0; i<am; i++) {
2469       jl[i] = am; il[i] = 0;
2470     }
2471 
2472     /* create and initialize a linked list for storing column indices of the active row k */
2473     nlnk = am + 1;
2474     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2475 
2476     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2477     ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2478     current_space     = free_space;
2479     ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr);
2480     current_space_lvl = free_space_lvl;
2481 
2482     for (k=0; k<am; k++) {  /* for each active row k */
2483       /* initialize lnk by the column indices of row rip[k] of A */
2484       nzk   = 0;
2485       ncols = ai[rip[k]+1] - ai[rip[k]];
2486       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2487       ncols_upper = 0;
2488       for (j=0; j<ncols; j++) {
2489         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2490         if (riip[i] >= k) { /* only take upper triangular entry */
2491           ajtmp[ncols_upper] = i;
2492           ncols_upper++;
2493         }
2494       }
2495       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2496       nzk += nlnk;
2497 
2498       /* update lnk by computing fill-in for each pivot row to be merged in */
2499       prow = jl[k]; /* 1st pivot row */
2500 
2501       while (prow < k) {
2502         nextprow = jl[prow];
2503 
2504         /* merge prow into k-th row */
2505         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2506         jmax  = ui[prow+1];
2507         ncols = jmax-jmin;
2508         i     = jmin - ui[prow];
2509         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2510         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2511         j     = *(uj - 1);
2512         ierr  = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2513         nzk  += nlnk;
2514 
2515         /* update il and jl for prow */
2516         if (jmin < jmax) {
2517           il[prow] = jmin;
2518           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2519         }
2520         prow = nextprow;
2521       }
2522 
2523       /* if free space is not available, make more free space */
2524       if (current_space->local_remaining<nzk) {
2525         i    = am - k + 1; /* num of unfactored rows */
2526         i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2527         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2528         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2529         reallocs++;
2530       }
2531 
2532       /* copy data into free_space and free_space_lvl, then initialize lnk */
2533       if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2534       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2535 
2536       /* add the k-th row into il and jl */
2537       if (nzk > 1) {
2538         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2539         jl[k] = jl[i]; jl[i] = k;
2540         il[k] = ui[k] + 1;
2541       }
2542       uj_ptr[k]     = current_space->array;
2543       uj_lvl_ptr[k] = current_space_lvl->array;
2544 
2545       current_space->array           += nzk;
2546       current_space->local_used      += nzk;
2547       current_space->local_remaining -= nzk;
2548 
2549       current_space_lvl->array           += nzk;
2550       current_space_lvl->local_used      += nzk;
2551       current_space_lvl->local_remaining -= nzk;
2552 
2553       ui[k+1] = ui[k] + nzk;
2554     }
2555 
2556     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2557     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2558     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2559     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2560 
2561     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2562     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2563     ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor  */
2564     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2565     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2566 
2567   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2568 
2569   /* put together the new matrix in MATSEQSBAIJ format */
2570   b               = (Mat_SeqSBAIJ*)(fact)->data;
2571   b->singlemalloc = PETSC_FALSE;
2572 
2573   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
2574 
2575   b->j             = uj;
2576   b->i             = ui;
2577   b->diag          = udiag;
2578   b->free_diag     = PETSC_TRUE;
2579   b->ilen          = 0;
2580   b->imax          = 0;
2581   b->row           = perm;
2582   b->col           = perm;
2583   ierr             = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2584   ierr             = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2585   b->icol          = iperm;
2586   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2587 
2588   ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
2589   ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2590 
2591   b->maxnz   = b->nz = ui[am];
2592   b->free_a  = PETSC_TRUE;
2593   b->free_ij = PETSC_TRUE;
2594 
2595   fact->info.factor_mallocs   = reallocs;
2596   fact->info.fill_ratio_given = fill;
2597   if (ai[am] != 0) {
2598     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2599     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2600   } else {
2601     fact->info.fill_ratio_needed = 0.0;
2602   }
2603 #if defined(PETSC_USE_INFO)
2604   if (ai[am] != 0) {
2605     PetscReal af = fact->info.fill_ratio_needed;
2606     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
2607     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2608     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
2609   } else {
2610     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2611   }
2612 #endif
2613   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2614   PetscFunctionReturn(0);
2615 }
2616 
2617 #undef __FUNCT__
2618 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace"
2619 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2620 {
2621   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2622   Mat_SeqSBAIJ       *b;
2623   PetscErrorCode     ierr;
2624   PetscBool          perm_identity,missing;
2625   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2626   const PetscInt     *rip,*riip;
2627   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2628   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2629   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2630   PetscReal          fill          =info->fill,levels=info->levels;
2631   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2632   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2633   PetscBT            lnkbt;
2634   IS                 iperm;
2635 
2636   PetscFunctionBegin;
2637   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2638   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2639   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2640   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2641   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2642 
2643   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
2644   ierr  = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr);
2645   ui[0] = 0;
2646 
2647   /* ICC(0) without matrix ordering: simply copies fill pattern */
2648   if (!levels && perm_identity) {
2649 
2650     for (i=0; i<am; i++) {
2651       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2652       udiag[i] = ui[i];
2653     }
2654     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2655     cols = uj;
2656     for (i=0; i<am; i++) {
2657       aj    = a->j + a->diag[i];
2658       ncols = ui[i+1] - ui[i];
2659       for (j=0; j<ncols; j++) *cols++ = *aj++;
2660     }
2661   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2662     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2663     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2664 
2665     /* initialization */
2666     ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr);
2667 
2668     /* jl: linked list for storing indices of the pivot rows
2669        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2670     ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr);
2671     for (i=0; i<am; i++) {
2672       jl[i] = am; il[i] = 0;
2673     }
2674 
2675     /* create and initialize a linked list for storing column indices of the active row k */
2676     nlnk = am + 1;
2677     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2678 
2679     /* initial FreeSpace size is fill*(ai[am]+1) */
2680     ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr);
2681     current_space     = free_space;
2682     ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);CHKERRQ(ierr);
2683     current_space_lvl = free_space_lvl;
2684 
2685     for (k=0; k<am; k++) {  /* for each active row k */
2686       /* initialize lnk by the column indices of row rip[k] of A */
2687       nzk   = 0;
2688       ncols = ai[rip[k]+1] - ai[rip[k]];
2689       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2690       ncols_upper = 0;
2691       for (j=0; j<ncols; j++) {
2692         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2693         if (riip[i] >= k) { /* only take upper triangular entry */
2694           ajtmp[ncols_upper] = i;
2695           ncols_upper++;
2696         }
2697       }
2698       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2699       nzk += nlnk;
2700 
2701       /* update lnk by computing fill-in for each pivot row to be merged in */
2702       prow = jl[k]; /* 1st pivot row */
2703 
2704       while (prow < k) {
2705         nextprow = jl[prow];
2706 
2707         /* merge prow into k-th row */
2708         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2709         jmax  = ui[prow+1];
2710         ncols = jmax-jmin;
2711         i     = jmin - ui[prow];
2712         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2713         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2714         j     = *(uj - 1);
2715         ierr  = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2716         nzk  += nlnk;
2717 
2718         /* update il and jl for prow */
2719         if (jmin < jmax) {
2720           il[prow] = jmin;
2721           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2722         }
2723         prow = nextprow;
2724       }
2725 
2726       /* if free space is not available, make more free space */
2727       if (current_space->local_remaining<nzk) {
2728         i    = am - k + 1; /* num of unfactored rows */
2729         i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2730         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2731         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2732         reallocs++;
2733       }
2734 
2735       /* copy data into free_space and free_space_lvl, then initialize lnk */
2736       if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2737       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2738 
2739       /* add the k-th row into il and jl */
2740       if (nzk > 1) {
2741         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2742         jl[k] = jl[i]; jl[i] = k;
2743         il[k] = ui[k] + 1;
2744       }
2745       uj_ptr[k]     = current_space->array;
2746       uj_lvl_ptr[k] = current_space_lvl->array;
2747 
2748       current_space->array           += nzk;
2749       current_space->local_used      += nzk;
2750       current_space->local_remaining -= nzk;
2751 
2752       current_space_lvl->array           += nzk;
2753       current_space_lvl->local_used      += nzk;
2754       current_space_lvl->local_remaining -= nzk;
2755 
2756       ui[k+1] = ui[k] + nzk;
2757     }
2758 
2759 #if defined(PETSC_USE_INFO)
2760     if (ai[am] != 0) {
2761       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2762       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
2763       ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2764       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
2765     } else {
2766       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2767     }
2768 #endif
2769 
2770     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2771     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2772     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2773     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2774 
2775     /* destroy list of free space and other temporary array(s) */
2776     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2777     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2778     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2779     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2780 
2781   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2782 
2783   /* put together the new matrix in MATSEQSBAIJ format */
2784 
2785   b               = (Mat_SeqSBAIJ*)fact->data;
2786   b->singlemalloc = PETSC_FALSE;
2787 
2788   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
2789 
2790   b->j         = uj;
2791   b->i         = ui;
2792   b->diag      = udiag;
2793   b->free_diag = PETSC_TRUE;
2794   b->ilen      = 0;
2795   b->imax      = 0;
2796   b->row       = perm;
2797   b->col       = perm;
2798 
2799   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2800   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2801 
2802   b->icol          = iperm;
2803   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2804   ierr             = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
2805   ierr             = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2806   b->maxnz         = b->nz = ui[am];
2807   b->free_a        = PETSC_TRUE;
2808   b->free_ij       = PETSC_TRUE;
2809 
2810   fact->info.factor_mallocs   = reallocs;
2811   fact->info.fill_ratio_given = fill;
2812   if (ai[am] != 0) {
2813     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2814   } else {
2815     fact->info.fill_ratio_needed = 0.0;
2816   }
2817   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2818   PetscFunctionReturn(0);
2819 }
2820 
2821 #undef __FUNCT__
2822 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ"
2823 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2824 {
2825   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2826   Mat_SeqSBAIJ       *b;
2827   PetscErrorCode     ierr;
2828   PetscBool          perm_identity,missing;
2829   PetscReal          fill = info->fill;
2830   const PetscInt     *rip,*riip;
2831   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2832   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2833   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2834   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2835   PetscBT            lnkbt;
2836   IS                 iperm;
2837 
2838   PetscFunctionBegin;
2839   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2840   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
2841   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2842 
2843   /* check whether perm is the identity mapping */
2844   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2845   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2846   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2847   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2848 
2849   /* initialization */
2850   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
2851   ierr  = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr);
2852   ui[0] = 0;
2853 
2854   /* jl: linked list for storing indices of the pivot rows
2855      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2856   ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr);
2857   for (i=0; i<am; i++) {
2858     jl[i] = am; il[i] = 0;
2859   }
2860 
2861   /* create and initialize a linked list for storing column indices of the active row k */
2862   nlnk = am + 1;
2863   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2864 
2865   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2866   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2867   current_space = free_space;
2868 
2869   for (k=0; k<am; k++) {  /* for each active row k */
2870     /* initialize lnk by the column indices of row rip[k] of A */
2871     nzk   = 0;
2872     ncols = ai[rip[k]+1] - ai[rip[k]];
2873     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2874     ncols_upper = 0;
2875     for (j=0; j<ncols; j++) {
2876       i = riip[*(aj + ai[rip[k]] + j)];
2877       if (i >= k) { /* only take upper triangular entry */
2878         cols[ncols_upper] = i;
2879         ncols_upper++;
2880       }
2881     }
2882     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2883     nzk += nlnk;
2884 
2885     /* update lnk by computing fill-in for each pivot row to be merged in */
2886     prow = jl[k]; /* 1st pivot row */
2887 
2888     while (prow < k) {
2889       nextprow = jl[prow];
2890       /* merge prow into k-th row */
2891       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2892       jmax   = ui[prow+1];
2893       ncols  = jmax-jmin;
2894       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2895       ierr   = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2896       nzk   += nlnk;
2897 
2898       /* update il and jl for prow */
2899       if (jmin < jmax) {
2900         il[prow] = jmin;
2901         j        = *uj_ptr;
2902         jl[prow] = jl[j];
2903         jl[j]    = prow;
2904       }
2905       prow = nextprow;
2906     }
2907 
2908     /* if free space is not available, make more free space */
2909     if (current_space->local_remaining<nzk) {
2910       i    = am - k + 1; /* num of unfactored rows */
2911       i    = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2912       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2913       reallocs++;
2914     }
2915 
2916     /* copy data into free space, then initialize lnk */
2917     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
2918 
2919     /* add the k-th row into il and jl */
2920     if (nzk > 1) {
2921       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2922       jl[k] = jl[i]; jl[i] = k;
2923       il[k] = ui[k] + 1;
2924     }
2925     ui_ptr[k] = current_space->array;
2926 
2927     current_space->array           += nzk;
2928     current_space->local_used      += nzk;
2929     current_space->local_remaining -= nzk;
2930 
2931     ui[k+1] = ui[k] + nzk;
2932   }
2933 
2934   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2935   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2936   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
2937 
2938   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2939   ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2940   ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */
2941   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2942 
2943   /* put together the new matrix in MATSEQSBAIJ format */
2944 
2945   b               = (Mat_SeqSBAIJ*)fact->data;
2946   b->singlemalloc = PETSC_FALSE;
2947   b->free_a       = PETSC_TRUE;
2948   b->free_ij      = PETSC_TRUE;
2949 
2950   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
2951 
2952   b->j         = uj;
2953   b->i         = ui;
2954   b->diag      = udiag;
2955   b->free_diag = PETSC_TRUE;
2956   b->ilen      = 0;
2957   b->imax      = 0;
2958   b->row       = perm;
2959   b->col       = perm;
2960 
2961   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2962   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2963 
2964   b->icol          = iperm;
2965   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2966 
2967   ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
2968   ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2969 
2970   b->maxnz = b->nz = ui[am];
2971 
2972   fact->info.factor_mallocs   = reallocs;
2973   fact->info.fill_ratio_given = fill;
2974   if (ai[am] != 0) {
2975     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2976     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2977   } else {
2978     fact->info.fill_ratio_needed = 0.0;
2979   }
2980 #if defined(PETSC_USE_INFO)
2981   if (ai[am] != 0) {
2982     PetscReal af = fact->info.fill_ratio_needed;
2983     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
2984     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2985     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
2986   } else {
2987     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2988   }
2989 #endif
2990   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2991   PetscFunctionReturn(0);
2992 }
2993 
2994 #undef __FUNCT__
2995 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace"
2996 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2997 {
2998   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2999   Mat_SeqSBAIJ       *b;
3000   PetscErrorCode     ierr;
3001   PetscBool          perm_identity,missing;
3002   PetscReal          fill = info->fill;
3003   const PetscInt     *rip,*riip;
3004   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
3005   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
3006   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
3007   PetscFreeSpaceList free_space=NULL,current_space=NULL;
3008   PetscBT            lnkbt;
3009   IS                 iperm;
3010 
3011   PetscFunctionBegin;
3012   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
3013   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3014   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3015 
3016   /* check whether perm is the identity mapping */
3017   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
3018   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
3019   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
3020   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
3021 
3022   /* initialization */
3023   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
3024   ui[0] = 0;
3025 
3026   /* jl: linked list for storing indices of the pivot rows
3027      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3028   ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr);
3029   for (i=0; i<am; i++) {
3030     jl[i] = am; il[i] = 0;
3031   }
3032 
3033   /* create and initialize a linked list for storing column indices of the active row k */
3034   nlnk = am + 1;
3035   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3036 
3037   /* initial FreeSpace size is fill*(ai[am]+1) */
3038   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr);
3039   current_space = free_space;
3040 
3041   for (k=0; k<am; k++) {  /* for each active row k */
3042     /* initialize lnk by the column indices of row rip[k] of A */
3043     nzk   = 0;
3044     ncols = ai[rip[k]+1] - ai[rip[k]];
3045     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3046     ncols_upper = 0;
3047     for (j=0; j<ncols; j++) {
3048       i = riip[*(aj + ai[rip[k]] + j)];
3049       if (i >= k) { /* only take upper triangular entry */
3050         cols[ncols_upper] = i;
3051         ncols_upper++;
3052       }
3053     }
3054     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3055     nzk += nlnk;
3056 
3057     /* update lnk by computing fill-in for each pivot row to be merged in */
3058     prow = jl[k]; /* 1st pivot row */
3059 
3060     while (prow < k) {
3061       nextprow = jl[prow];
3062       /* merge prow into k-th row */
3063       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3064       jmax   = ui[prow+1];
3065       ncols  = jmax-jmin;
3066       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3067       ierr   = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3068       nzk   += nlnk;
3069 
3070       /* update il and jl for prow */
3071       if (jmin < jmax) {
3072         il[prow] = jmin;
3073         j        = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3074       }
3075       prow = nextprow;
3076     }
3077 
3078     /* if free space is not available, make more free space */
3079     if (current_space->local_remaining<nzk) {
3080       i    = am - k + 1; /* num of unfactored rows */
3081       i    = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3082       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
3083       reallocs++;
3084     }
3085 
3086     /* copy data into free space, then initialize lnk */
3087     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3088 
3089     /* add the k-th row into il and jl */
3090     if (nzk-1 > 0) {
3091       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3092       jl[k] = jl[i]; jl[i] = k;
3093       il[k] = ui[k] + 1;
3094     }
3095     ui_ptr[k] = current_space->array;
3096 
3097     current_space->array           += nzk;
3098     current_space->local_used      += nzk;
3099     current_space->local_remaining -= nzk;
3100 
3101     ui[k+1] = ui[k] + nzk;
3102   }
3103 
3104 #if defined(PETSC_USE_INFO)
3105   if (ai[am] != 0) {
3106     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3107     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
3108     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
3109     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
3110   } else {
3111     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
3112   }
3113 #endif
3114 
3115   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
3116   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
3117   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
3118 
3119   /* destroy list of free space and other temporary array(s) */
3120   ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
3121   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
3122   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3123 
3124   /* put together the new matrix in MATSEQSBAIJ format */
3125 
3126   b               = (Mat_SeqSBAIJ*)fact->data;
3127   b->singlemalloc = PETSC_FALSE;
3128   b->free_a       = PETSC_TRUE;
3129   b->free_ij      = PETSC_TRUE;
3130 
3131   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
3132 
3133   b->j    = uj;
3134   b->i    = ui;
3135   b->diag = 0;
3136   b->ilen = 0;
3137   b->imax = 0;
3138   b->row  = perm;
3139   b->col  = perm;
3140 
3141   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3142   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3143 
3144   b->icol          = iperm;
3145   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3146 
3147   ierr     = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
3148   ierr     = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3149   b->maxnz = b->nz = ui[am];
3150 
3151   fact->info.factor_mallocs   = reallocs;
3152   fact->info.fill_ratio_given = fill;
3153   if (ai[am] != 0) {
3154     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3155   } else {
3156     fact->info.fill_ratio_needed = 0.0;
3157   }
3158   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3159   PetscFunctionReturn(0);
3160 }
3161 
3162 #undef __FUNCT__
3163 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering"
3164 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3165 {
3166   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3167   PetscErrorCode    ierr;
3168   PetscInt          n   = A->rmap->n;
3169   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3170   PetscScalar       *x,sum;
3171   const PetscScalar *b;
3172   const MatScalar   *aa = a->a,*v;
3173   PetscInt          i,nz;
3174 
3175   PetscFunctionBegin;
3176   if (!n) PetscFunctionReturn(0);
3177 
3178   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3179   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3180 
3181   /* forward solve the lower triangular */
3182   x[0] = b[0];
3183   v    = aa;
3184   vi   = aj;
3185   for (i=1; i<n; i++) {
3186     nz  = ai[i+1] - ai[i];
3187     sum = b[i];
3188     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3189     v   += nz;
3190     vi  += nz;
3191     x[i] = sum;
3192   }
3193 
3194   /* backward solve the upper triangular */
3195   for (i=n-1; i>=0; i--) {
3196     v   = aa + adiag[i+1] + 1;
3197     vi  = aj + adiag[i+1] + 1;
3198     nz  = adiag[i] - adiag[i+1]-1;
3199     sum = x[i];
3200     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3201     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3202   }
3203 
3204   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3205   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3206   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3207   PetscFunctionReturn(0);
3208 }
3209 
3210 #undef __FUNCT__
3211 #define __FUNCT__ "MatSolve_SeqAIJ"
3212 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3213 {
3214   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
3215   IS                iscol = a->col,isrow = a->row;
3216   PetscErrorCode    ierr;
3217   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3218   const PetscInt    *rout,*cout,*r,*c;
3219   PetscScalar       *x,*tmp,sum;
3220   const PetscScalar *b;
3221   const MatScalar   *aa = a->a,*v;
3222 
3223   PetscFunctionBegin;
3224   if (!n) PetscFunctionReturn(0);
3225 
3226   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3227   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3228   tmp  = a->solve_work;
3229 
3230   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
3231   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
3232 
3233   /* forward solve the lower triangular */
3234   tmp[0] = b[r[0]];
3235   v      = aa;
3236   vi     = aj;
3237   for (i=1; i<n; i++) {
3238     nz  = ai[i+1] - ai[i];
3239     sum = b[r[i]];
3240     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3241     tmp[i] = sum;
3242     v     += nz; vi += nz;
3243   }
3244 
3245   /* backward solve the upper triangular */
3246   for (i=n-1; i>=0; i--) {
3247     v   = aa + adiag[i+1]+1;
3248     vi  = aj + adiag[i+1]+1;
3249     nz  = adiag[i]-adiag[i+1]-1;
3250     sum = tmp[i];
3251     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3252     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3253   }
3254 
3255   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
3256   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
3257   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3258   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3259   ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr);
3260   PetscFunctionReturn(0);
3261 }
3262 
3263 #undef __FUNCT__
3264 #define __FUNCT__ "MatILUDTFactor_SeqAIJ"
3265 /*
3266     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3267 */
3268 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3269 {
3270   Mat            B = *fact;
3271   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b;
3272   IS             isicol;
3273   PetscErrorCode ierr;
3274   const PetscInt *r,*ic;
3275   PetscInt       i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3276   PetscInt       *bi,*bj,*bdiag,*bdiag_rev;
3277   PetscInt       row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3278   PetscInt       nlnk,*lnk;
3279   PetscBT        lnkbt;
3280   PetscBool      row_identity,icol_identity;
3281   MatScalar      *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3282   const PetscInt *ics;
3283   PetscInt       j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3284   PetscReal      dt     =info->dt,shift=info->shiftamount;
3285   PetscInt       dtcount=(PetscInt)info->dtcount,nnz_max;
3286   PetscBool      missing;
3287 
3288   PetscFunctionBegin;
3289   if (dt      == PETSC_DEFAULT) dt = 0.005;
3290   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3291 
3292   /* ------- symbolic factorization, can be reused ---------*/
3293   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3294   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3295   adiag=a->diag;
3296 
3297   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
3298 
3299   /* bdiag is location of diagonal in factor */
3300   ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);     /* becomes b->diag */
3301   ierr = PetscMalloc1(n+1,&bdiag_rev);CHKERRQ(ierr); /* temporary */
3302 
3303   /* allocate row pointers bi */
3304   ierr = PetscMalloc1(2*n+2,&bi);CHKERRQ(ierr);
3305 
3306   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3307   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3308   nnz_max = ai[n]+2*n*dtcount+2;
3309 
3310   ierr = PetscMalloc1(nnz_max+1,&bj);CHKERRQ(ierr);
3311   ierr = PetscMalloc1(nnz_max+1,&ba);CHKERRQ(ierr);
3312 
3313   /* put together the new matrix */
3314   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
3315   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr);
3316   b    = (Mat_SeqAIJ*)B->data;
3317 
3318   b->free_a       = PETSC_TRUE;
3319   b->free_ij      = PETSC_TRUE;
3320   b->singlemalloc = PETSC_FALSE;
3321 
3322   b->a    = ba;
3323   b->j    = bj;
3324   b->i    = bi;
3325   b->diag = bdiag;
3326   b->ilen = 0;
3327   b->imax = 0;
3328   b->row  = isrow;
3329   b->col  = iscol;
3330   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
3331   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
3332   b->icol = isicol;
3333 
3334   ierr     = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
3335   ierr     = PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3336   b->maxnz = nnz_max;
3337 
3338   B->factortype            = MAT_FACTOR_ILUDT;
3339   B->info.factor_mallocs   = 0;
3340   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3341   /* ------- end of symbolic factorization ---------*/
3342 
3343   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3344   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3345   ics  = ic;
3346 
3347   /* linked list for storing column indices of the active row */
3348   nlnk = n + 1;
3349   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3350 
3351   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3352   ierr = PetscMalloc2(n,&im,n,&jtmp);CHKERRQ(ierr);
3353   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3354   ierr = PetscMalloc2(n,&rtmp,n,&vtmp);CHKERRQ(ierr);
3355   ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr);
3356 
3357   bi[0]        = 0;
3358   bdiag[0]     = nnz_max-1; /* location of diag[0] in factor B */
3359   bdiag_rev[n] = bdiag[0];
3360   bi[2*n+1]    = bdiag[0]+1; /* endof bj and ba array */
3361   for (i=0; i<n; i++) {
3362     /* copy initial fill into linked list */
3363     nzi = ai[r[i]+1] - ai[r[i]];
3364     if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3365     nzi_al = adiag[r[i]] - ai[r[i]];
3366     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3367     ajtmp  = aj + ai[r[i]];
3368     ierr   = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3369 
3370     /* load in initial (unfactored row) */
3371     aatmp = a->a + ai[r[i]];
3372     for (j=0; j<nzi; j++) {
3373       rtmp[ics[*ajtmp++]] = *aatmp++;
3374     }
3375 
3376     /* add pivot rows into linked list */
3377     row = lnk[n];
3378     while (row < i) {
3379       nzi_bl = bi[row+1] - bi[row] + 1;
3380       bjtmp  = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3381       ierr   = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
3382       nzi   += nlnk;
3383       row    = lnk[row];
3384     }
3385 
3386     /* copy data from lnk into jtmp, then initialize lnk */
3387     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
3388 
3389     /* numerical factorization */
3390     bjtmp = jtmp;
3391     row   = *bjtmp++; /* 1st pivot row */
3392     while (row < i) {
3393       pc         = rtmp + row;
3394       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3395       multiplier = (*pc) * (*pv);
3396       *pc        = multiplier;
3397       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3398         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3399         pv = ba + bdiag[row+1] + 1;
3400         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3401         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3402         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3403         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3404       }
3405       row = *bjtmp++;
3406     }
3407 
3408     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3409     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3410     nzi_bl   = 0; j = 0;
3411     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3412       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3413       nzi_bl++; j++;
3414     }
3415     nzi_bu = nzi - nzi_bl -1;
3416     while (j < nzi) {
3417       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3418       j++;
3419     }
3420 
3421     bjtmp = bj + bi[i];
3422     batmp = ba + bi[i];
3423     /* apply level dropping rule to L part */
3424     ncut = nzi_al + dtcount;
3425     if (ncut < nzi_bl) {
3426       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
3427       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
3428     } else {
3429       ncut = nzi_bl;
3430     }
3431     for (j=0; j<ncut; j++) {
3432       bjtmp[j] = jtmp[j];
3433       batmp[j] = vtmp[j];
3434       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3435     }
3436     bi[i+1] = bi[i] + ncut;
3437     nzi     = ncut + 1;
3438 
3439     /* apply level dropping rule to U part */
3440     ncut = nzi_au + dtcount;
3441     if (ncut < nzi_bu) {
3442       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
3443       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
3444     } else {
3445       ncut = nzi_bu;
3446     }
3447     nzi += ncut;
3448 
3449     /* mark bdiagonal */
3450     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3451     bdiag_rev[n-i-1] = bdiag[i+1];
3452     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3453     bjtmp            = bj + bdiag[i];
3454     batmp            = ba + bdiag[i];
3455     *bjtmp           = i;
3456     *batmp           = diag_tmp; /* rtmp[i]; */
3457     if (*batmp == 0.0) {
3458       *batmp = dt+shift;
3459       /* printf(" row %d add shift %g\n",i,shift); */
3460     }
3461     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3462     /* printf(" (%d,%g),",*bjtmp,*batmp); */
3463 
3464     bjtmp = bj + bdiag[i+1]+1;
3465     batmp = ba + bdiag[i+1]+1;
3466     for (k=0; k<ncut; k++) {
3467       bjtmp[k] = jtmp[nzi_bl+1+k];
3468       batmp[k] = vtmp[nzi_bl+1+k];
3469       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3470     }
3471     /* printf("\n"); */
3472 
3473     im[i] = nzi;   /* used by PetscLLAddSortedLU() */
3474     /*
3475     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3476     printf(" ----------------------------\n");
3477     */
3478   } /* for (i=0; i<n; i++) */
3479     /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3480   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3481 
3482   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3483   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3484 
3485   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3486   ierr = PetscFree2(im,jtmp);CHKERRQ(ierr);
3487   ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr);
3488   ierr = PetscFree(bdiag_rev);CHKERRQ(ierr);
3489 
3490   ierr     = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
3491   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3492 
3493   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3494   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
3495   if (row_identity && icol_identity) {
3496     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3497   } else {
3498     B->ops->solve = MatSolve_SeqAIJ;
3499   }
3500 
3501   B->ops->solveadd          = 0;
3502   B->ops->solvetranspose    = 0;
3503   B->ops->solvetransposeadd = 0;
3504   B->ops->matsolve          = 0;
3505   B->assembled              = PETSC_TRUE;
3506   B->preallocated           = PETSC_TRUE;
3507   PetscFunctionReturn(0);
3508 }
3509 
3510 /* a wraper of MatILUDTFactor_SeqAIJ() */
3511 #undef __FUNCT__
3512 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ"
3513 /*
3514     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3515 */
3516 
3517 PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3518 {
3519   PetscErrorCode ierr;
3520 
3521   PetscFunctionBegin;
3522   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
3523   PetscFunctionReturn(0);
3524 }
3525 
3526 /*
3527    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3528    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3529 */
3530 #undef __FUNCT__
3531 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ"
3532 /*
3533     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3534 */
3535 
3536 PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3537 {
3538   Mat            C     =fact;
3539   Mat_SeqAIJ     *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3540   IS             isrow = b->row,isicol = b->icol;
3541   PetscErrorCode ierr;
3542   const PetscInt *r,*ic,*ics;
3543   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3544   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3545   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3546   PetscReal      dt=info->dt,shift=info->shiftamount;
3547   PetscBool      row_identity, col_identity;
3548 
3549   PetscFunctionBegin;
3550   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3551   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3552   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
3553   ics  = ic;
3554 
3555   for (i=0; i<n; i++) {
3556     /* initialize rtmp array */
3557     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3558     bjtmp = bj + bi[i];
3559     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3560     rtmp[i] = 0.0;
3561     nzu     = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3562     bjtmp   = bj + bdiag[i+1] + 1;
3563     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3564 
3565     /* load in initial unfactored row of A */
3566     /* printf("row %d\n",i); */
3567     nz    = ai[r[i]+1] - ai[r[i]];
3568     ajtmp = aj + ai[r[i]];
3569     v     = aa + ai[r[i]];
3570     for (j=0; j<nz; j++) {
3571       rtmp[ics[*ajtmp++]] = v[j];
3572       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3573     }
3574     /* printf("\n"); */
3575 
3576     /* numerical factorization */
3577     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3578     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3579     k     = 0;
3580     while (k < nzl) {
3581       row = *bjtmp++;
3582       /* printf("  prow %d\n",row); */
3583       pc         = rtmp + row;
3584       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3585       multiplier = (*pc) * (*pv);
3586       *pc        = multiplier;
3587       if (PetscAbsScalar(multiplier) > dt) {
3588         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3589         pv = b->a + bdiag[row+1] + 1;
3590         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3591         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3592         ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr);
3593       }
3594       k++;
3595     }
3596 
3597     /* finished row so stick it into b->a */
3598     /* L-part */
3599     pv  = b->a + bi[i];
3600     pj  = bj + bi[i];
3601     nzl = bi[i+1] - bi[i];
3602     for (j=0; j<nzl; j++) {
3603       pv[j] = rtmp[pj[j]];
3604       /* printf(" (%d,%g),",pj[j],pv[j]); */
3605     }
3606 
3607     /* diagonal: invert diagonal entries for simplier triangular solves */
3608     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3609     b->a[bdiag[i]] = 1.0/rtmp[i];
3610     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3611 
3612     /* U-part */
3613     pv  = b->a + bdiag[i+1] + 1;
3614     pj  = bj + bdiag[i+1] + 1;
3615     nzu = bdiag[i] - bdiag[i+1] - 1;
3616     for (j=0; j<nzu; j++) {
3617       pv[j] = rtmp[pj[j]];
3618       /* printf(" (%d,%g),",pj[j],pv[j]); */
3619     }
3620     /* printf("\n"); */
3621   }
3622 
3623   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3624   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3625   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3626 
3627   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3628   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
3629   if (row_identity && col_identity) {
3630     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3631   } else {
3632     C->ops->solve = MatSolve_SeqAIJ;
3633   }
3634   C->ops->solveadd          = 0;
3635   C->ops->solvetranspose    = 0;
3636   C->ops->solvetransposeadd = 0;
3637   C->ops->matsolve          = 0;
3638   C->assembled              = PETSC_TRUE;
3639   C->preallocated           = PETSC_TRUE;
3640 
3641   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
3642   PetscFunctionReturn(0);
3643 }
3644