xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 76be6f4ff3bd4e251c19fc00ebbebfd58b6e7589)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 /*
8       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
9 
10       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
11 */
12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
13 {
14   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
15   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
16   const PetscInt    *ai = a->i, *aj = a->j;
17   const PetscScalar *aa = a->a;
18   PetscBool         *done;
19   PetscReal         best,past = 0,future;
20 
21   PetscFunctionBegin;
22   /* pick initial row */
23   best = -1;
24   for (i=0; i<n; i++) {
25     future = 0.0;
26     for (j=ai[i]; j<ai[i+1]; j++) {
27       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
28       else              past  = PetscAbsScalar(aa[j]);
29     }
30     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
31     if (past/future > best) {
32       best    = past/future;
33       current = i;
34     }
35   }
36 
37   PetscCall(PetscMalloc1(n,&done));
38   PetscCall(PetscArrayzero(done,n));
39   PetscCall(PetscMalloc1(n,&order));
40   order[0] = current;
41   for (i=0; i<n-1; i++) {
42     done[current] = PETSC_TRUE;
43     best          = -1;
44     /* loop over all neighbors of current pivot */
45     for (j=ai[current]; j<ai[current+1]; j++) {
46       jj = aj[j];
47       if (done[jj]) continue;
48       /* loop over columns of potential next row computing weights for below and above diagonal */
49       past = future = 0.0;
50       for (k=ai[jj]; k<ai[jj+1]; k++) {
51         kk = aj[k];
52         if (done[kk]) past += PetscAbsScalar(aa[k]);
53         else if (kk != jj) future += PetscAbsScalar(aa[k]);
54       }
55       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
56       if (past/future > best) {
57         best       = past/future;
58         newcurrent = jj;
59       }
60     }
61     if (best == -1) { /* no neighbors to select from so select best of all that remain */
62       best = -1;
63       for (k=0; k<n; k++) {
64         if (done[k]) continue;
65         future = 0.0;
66         past   = 0.0;
67         for (j=ai[k]; j<ai[k+1]; j++) {
68           kk = aj[j];
69           if (done[kk])       past += PetscAbsScalar(aa[j]);
70           else if (kk != k) future += PetscAbsScalar(aa[j]);
71         }
72         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
73         if (past/future > best) {
74           best       = past/future;
75           newcurrent = k;
76         }
77       }
78     }
79     PetscCheck(current != newcurrent,PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
80     current    = newcurrent;
81     order[i+1] = current;
82   }
83   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow));
84   *icol = *irow;
85   PetscCall(PetscObjectReference((PetscObject)*irow));
86   PetscCall(PetscFree(done));
87   PetscCall(PetscFree(order));
88   PetscFunctionReturn(0);
89 }
90 
91 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A,MatSolverType *type)
92 {
93   PetscFunctionBegin;
94   *type = MATSOLVERPETSC;
95   PetscFunctionReturn(0);
96 }
97 
98 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
99 {
100   PetscInt       n = A->rmap->n;
101 
102   PetscFunctionBegin;
103 #if defined(PETSC_USE_COMPLEX)
104   PetscCheck(!A->hermitian || A->symmetric || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC),PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY or ICC Factor is not supported");
105 #endif
106   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),B));
107   PetscCall(MatSetSizes(*B,n,n,n,n));
108   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
109     PetscCall(MatSetType(*B,MATSEQAIJ));
110 
111     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
112     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
113 
114     PetscCall(MatSetBlockSizesFromMats(*B,A,A));
115     PetscCall(PetscStrallocpy(MATORDERINGND,(char**)&(*B)->preferredordering[MAT_FACTOR_LU]));
116     PetscCall(PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ILU]));
117     PetscCall(PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
118   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
119     PetscCall(MatSetType(*B,MATSEQSBAIJ));
120     PetscCall(MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL));
121 
122     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
123     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
124     PetscCall(PetscStrallocpy(MATORDERINGND,(char**)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
125     PetscCall(PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ICC]));
126   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
127   (*B)->factortype = ftype;
128 
129   PetscCall(PetscFree((*B)->solvertype));
130   PetscCall(PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype));
131   (*B)->canuseordering = PETSC_TRUE;
132   PetscCall(PetscObjectComposeFunction((PetscObject)*B,"MatFactorGetSolverType_C",MatFactorGetSolverType_petsc));
133   PetscFunctionReturn(0);
134 }
135 
136 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
137 {
138   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
139   IS                 isicol;
140   const PetscInt     *r,*ic;
141   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
142   PetscInt           *bi,*bj,*ajtmp;
143   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
144   PetscReal          f;
145   PetscInt           nlnk,*lnk,k,**bi_ptr;
146   PetscFreeSpaceList free_space=NULL,current_space=NULL;
147   PetscBT            lnkbt;
148   PetscBool          missing;
149 
150   PetscFunctionBegin;
151   PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
152   PetscCall(MatMissingDiagonal(A,&missing,&i));
153   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i);
154 
155   PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol));
156   PetscCall(ISGetIndices(isrow,&r));
157   PetscCall(ISGetIndices(isicol,&ic));
158 
159   /* get new row pointers */
160   PetscCall(PetscMalloc1(n+1,&bi));
161   bi[0] = 0;
162 
163   /* bdiag is location of diagonal in factor */
164   PetscCall(PetscMalloc1(n+1,&bdiag));
165   bdiag[0] = 0;
166 
167   /* linked list for storing column indices of the active row */
168   nlnk = n + 1;
169   PetscCall(PetscLLCreate(n,n,nlnk,lnk,lnkbt));
170 
171   PetscCall(PetscMalloc2(n+1,&bi_ptr,n+1,&im));
172 
173   /* initial FreeSpace size is f*(ai[n]+1) */
174   f             = info->fill;
175   if (n==1)   f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
176   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space));
177   current_space = free_space;
178 
179   for (i=0; i<n; i++) {
180     /* copy previous fill into linked list */
181     nzi = 0;
182     nnz = ai[r[i]+1] - ai[r[i]];
183     ajtmp = aj + ai[r[i]];
184     PetscCall(PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt));
185     nzi  += nlnk;
186 
187     /* add pivot rows into linked list */
188     row = lnk[n];
189     while (row < i) {
190       nzbd  = bdiag[row] - bi[row] + 1;   /* num of entries in the row with column index <= row */
191       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
192       PetscCall(PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im));
193       nzi  += nlnk;
194       row   = lnk[row];
195     }
196     bi[i+1] = bi[i] + nzi;
197     im[i]   = nzi;
198 
199     /* mark bdiag */
200     nzbd = 0;
201     nnz  = nzi;
202     k    = lnk[n];
203     while (nnz-- && k < i) {
204       nzbd++;
205       k = lnk[k];
206     }
207     bdiag[i] = bi[i] + nzbd;
208 
209     /* if free space is not available, make more free space */
210     if (current_space->local_remaining<nzi) {
211       nnz  = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
212       PetscCall(PetscFreeSpaceGet(nnz,&current_space));
213       reallocs++;
214     }
215 
216     /* copy data into free space, then initialize lnk */
217     PetscCall(PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt));
218 
219     bi_ptr[i]                       = current_space->array;
220     current_space->array           += nzi;
221     current_space->local_used      += nzi;
222     current_space->local_remaining -= nzi;
223   }
224 #if defined(PETSC_USE_INFO)
225   if (ai[n] != 0) {
226     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
227     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af));
228     PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
229     PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af));
230     PetscCall(PetscInfo(A,"for best performance.\n"));
231   } else {
232     PetscCall(PetscInfo(A,"Empty matrix\n"));
233   }
234 #endif
235 
236   PetscCall(ISRestoreIndices(isrow,&r));
237   PetscCall(ISRestoreIndices(isicol,&ic));
238 
239   /* destroy list of free space and other temporary array(s) */
240   PetscCall(PetscMalloc1(bi[n]+1,&bj));
241   PetscCall(PetscFreeSpaceContiguous(&free_space,bj));
242   PetscCall(PetscLLDestroy(lnk,lnkbt));
243   PetscCall(PetscFree2(bi_ptr,im));
244 
245   /* put together the new matrix */
246   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL));
247   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)isicol));
248   b    = (Mat_SeqAIJ*)(B)->data;
249 
250   b->free_a       = PETSC_TRUE;
251   b->free_ij      = PETSC_TRUE;
252   b->singlemalloc = PETSC_FALSE;
253 
254   PetscCall(PetscMalloc1(bi[n]+1,&b->a));
255   b->j    = bj;
256   b->i    = bi;
257   b->diag = bdiag;
258   b->ilen = NULL;
259   b->imax = NULL;
260   b->row  = isrow;
261   b->col  = iscol;
262   PetscCall(PetscObjectReference((PetscObject)isrow));
263   PetscCall(PetscObjectReference((PetscObject)iscol));
264   b->icol = isicol;
265   PetscCall(PetscMalloc1(n+1,&b->solve_work));
266 
267   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
268   PetscCall(PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar))));
269   b->maxnz = b->nz = bi[n];
270 
271   (B)->factortype            = MAT_FACTOR_LU;
272   (B)->info.factor_mallocs   = reallocs;
273   (B)->info.fill_ratio_given = f;
274 
275   if (ai[n]) {
276     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
277   } else {
278     (B)->info.fill_ratio_needed = 0.0;
279   }
280   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
281   if (a->inode.size) {
282     (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
283   }
284   PetscFunctionReturn(0);
285 }
286 
287 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
288 {
289   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
290   IS                 isicol;
291   const PetscInt     *r,*ic,*ai=a->i,*aj=a->j,*ajtmp;
292   PetscInt           i,n=A->rmap->n;
293   PetscInt           *bi,*bj;
294   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
295   PetscReal          f;
296   PetscInt           nlnk,*lnk,k,**bi_ptr;
297   PetscFreeSpaceList free_space=NULL,current_space=NULL;
298   PetscBT            lnkbt;
299   PetscBool          missing;
300 
301   PetscFunctionBegin;
302   PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
303   PetscCall(MatMissingDiagonal(A,&missing,&i));
304   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i);
305 
306   PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol));
307   PetscCall(ISGetIndices(isrow,&r));
308   PetscCall(ISGetIndices(isicol,&ic));
309 
310   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
311   PetscCall(PetscMalloc1(n+1,&bi));
312   PetscCall(PetscMalloc1(n+1,&bdiag));
313   bi[0] = bdiag[0] = 0;
314 
315   /* linked list for storing column indices of the active row */
316   nlnk = n + 1;
317   PetscCall(PetscLLCreate(n,n,nlnk,lnk,lnkbt));
318 
319   PetscCall(PetscMalloc2(n+1,&bi_ptr,n+1,&im));
320 
321   /* initial FreeSpace size is f*(ai[n]+1) */
322   f             = info->fill;
323   if (n==1)   f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
324   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space));
325   current_space = free_space;
326 
327   for (i=0; i<n; i++) {
328     /* copy previous fill into linked list */
329     nzi = 0;
330     nnz = ai[r[i]+1] - ai[r[i]];
331     ajtmp = aj + ai[r[i]];
332     PetscCall(PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt));
333     nzi  += nlnk;
334 
335     /* add pivot rows into linked list */
336     row = lnk[n];
337     while (row < i) {
338       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
339       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
340       PetscCall(PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im));
341       nzi  += nlnk;
342       row   = lnk[row];
343     }
344     bi[i+1] = bi[i] + nzi;
345     im[i]   = nzi;
346 
347     /* mark bdiag */
348     nzbd = 0;
349     nnz  = nzi;
350     k    = lnk[n];
351     while (nnz-- && k < i) {
352       nzbd++;
353       k = lnk[k];
354     }
355     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
356 
357     /* if free space is not available, make more free space */
358     if (current_space->local_remaining<nzi) {
359       /* estimated additional space needed */
360       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi));
361       PetscCall(PetscFreeSpaceGet(nnz,&current_space));
362       reallocs++;
363     }
364 
365     /* copy data into free space, then initialize lnk */
366     PetscCall(PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt));
367 
368     bi_ptr[i]                       = current_space->array;
369     current_space->array           += nzi;
370     current_space->local_used      += nzi;
371     current_space->local_remaining -= nzi;
372   }
373 
374   PetscCall(ISRestoreIndices(isrow,&r));
375   PetscCall(ISRestoreIndices(isicol,&ic));
376 
377   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
378   PetscCall(PetscMalloc1(bi[n]+1,&bj));
379   PetscCall(PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag));
380   PetscCall(PetscLLDestroy(lnk,lnkbt));
381   PetscCall(PetscFree2(bi_ptr,im));
382 
383   /* put together the new matrix */
384   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL));
385   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)isicol));
386   b    = (Mat_SeqAIJ*)(B)->data;
387 
388   b->free_a       = PETSC_TRUE;
389   b->free_ij      = PETSC_TRUE;
390   b->singlemalloc = PETSC_FALSE;
391 
392   PetscCall(PetscMalloc1(bdiag[0]+1,&b->a));
393 
394   b->j    = bj;
395   b->i    = bi;
396   b->diag = bdiag;
397   b->ilen = NULL;
398   b->imax = NULL;
399   b->row  = isrow;
400   b->col  = iscol;
401   PetscCall(PetscObjectReference((PetscObject)isrow));
402   PetscCall(PetscObjectReference((PetscObject)iscol));
403   b->icol = isicol;
404   PetscCall(PetscMalloc1(n+1,&b->solve_work));
405 
406   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
407   PetscCall(PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar))));
408   b->maxnz = b->nz = bdiag[0]+1;
409 
410   B->factortype            = MAT_FACTOR_LU;
411   B->info.factor_mallocs   = reallocs;
412   B->info.fill_ratio_given = f;
413 
414   if (ai[n]) {
415     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
416   } else {
417     B->info.fill_ratio_needed = 0.0;
418   }
419 #if defined(PETSC_USE_INFO)
420   if (ai[n] != 0) {
421     PetscReal af = B->info.fill_ratio_needed;
422     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af));
423     PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
424     PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af));
425     PetscCall(PetscInfo(A,"for best performance.\n"));
426   } else {
427     PetscCall(PetscInfo(A,"Empty matrix\n"));
428   }
429 #endif
430   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
431   if (a->inode.size) {
432     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
433   }
434   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
435   PetscFunctionReturn(0);
436 }
437 
438 /*
439     Trouble in factorization, should we dump the original matrix?
440 */
441 PetscErrorCode MatFactorDumpMatrix(Mat A)
442 {
443   PetscBool      flg = PETSC_FALSE;
444 
445   PetscFunctionBegin;
446   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL));
447   if (flg) {
448     PetscViewer viewer;
449     char        filename[PETSC_MAX_PATH_LEN];
450 
451     PetscCall(PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank));
452     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer));
453     PetscCall(MatView(A,viewer));
454     PetscCall(PetscViewerDestroy(&viewer));
455   }
456   PetscFunctionReturn(0);
457 }
458 
459 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
460 {
461   Mat             C     =B;
462   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
463   IS              isrow = b->row,isicol = b->icol;
464   const PetscInt  *r,*ic,*ics;
465   const PetscInt  n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
466   PetscInt        i,j,k,nz,nzL,row,*pj;
467   const PetscInt  *ajtmp,*bjtmp;
468   MatScalar       *rtmp,*pc,multiplier,*pv;
469   const MatScalar *aa=a->a,*v;
470   PetscBool       row_identity,col_identity;
471   FactorShiftCtx  sctx;
472   const PetscInt  *ddiag;
473   PetscReal       rs;
474   MatScalar       d;
475 
476   PetscFunctionBegin;
477   /* MatPivotSetUp(): initialize shift context sctx */
478   PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx)));
479 
480   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
481     ddiag          = a->diag;
482     sctx.shift_top = info->zeropivot;
483     for (i=0; i<n; i++) {
484       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
485       d  = (aa)[ddiag[i]];
486       rs = -PetscAbsScalar(d) - PetscRealPart(d);
487       v  = aa+ai[i];
488       nz = ai[i+1] - ai[i];
489       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
490       if (rs>sctx.shift_top) sctx.shift_top = rs;
491     }
492     sctx.shift_top *= 1.1;
493     sctx.nshift_max = 5;
494     sctx.shift_lo   = 0.;
495     sctx.shift_hi   = 1.;
496   }
497 
498   PetscCall(ISGetIndices(isrow,&r));
499   PetscCall(ISGetIndices(isicol,&ic));
500   PetscCall(PetscMalloc1(n+1,&rtmp));
501   ics  = ic;
502 
503   do {
504     sctx.newshift = PETSC_FALSE;
505     for (i=0; i<n; i++) {
506       /* zero rtmp */
507       /* L part */
508       nz    = bi[i+1] - bi[i];
509       bjtmp = bj + bi[i];
510       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
511 
512       /* U part */
513       nz    = bdiag[i]-bdiag[i+1];
514       bjtmp = bj + bdiag[i+1]+1;
515       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
516 
517       /* load in initial (unfactored row) */
518       nz    = ai[r[i]+1] - ai[r[i]];
519       ajtmp = aj + ai[r[i]];
520       v     = aa + ai[r[i]];
521       for (j=0; j<nz; j++) {
522         rtmp[ics[ajtmp[j]]] = v[j];
523       }
524       /* ZeropivotApply() */
525       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
526 
527       /* elimination */
528       bjtmp = bj + bi[i];
529       row   = *bjtmp++;
530       nzL   = bi[i+1] - bi[i];
531       for (k=0; k < nzL; k++) {
532         pc = rtmp + row;
533         if (*pc != 0.0) {
534           pv         = b->a + bdiag[row];
535           multiplier = *pc * (*pv);
536           *pc        = multiplier;
537 
538           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
539           pv = b->a + bdiag[row+1]+1;
540           nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
541 
542           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
543           PetscCall(PetscLogFlops(1+2.0*nz));
544         }
545         row = *bjtmp++;
546       }
547 
548       /* finished row so stick it into b->a */
549       rs = 0.0;
550       /* L part */
551       pv = b->a + bi[i];
552       pj = b->j + bi[i];
553       nz = bi[i+1] - bi[i];
554       for (j=0; j<nz; j++) {
555         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
556       }
557 
558       /* U part */
559       pv = b->a + bdiag[i+1]+1;
560       pj = b->j + bdiag[i+1]+1;
561       nz = bdiag[i] - bdiag[i+1]-1;
562       for (j=0; j<nz; j++) {
563         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
564       }
565 
566       sctx.rs = rs;
567       sctx.pv = rtmp[i];
568       PetscCall(MatPivotCheck(B,A,info,&sctx,i));
569       if (sctx.newshift) break; /* break for-loop */
570       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
571 
572       /* Mark diagonal and invert diagonal for simpler triangular solves */
573       pv  = b->a + bdiag[i];
574       *pv = 1.0/rtmp[i];
575 
576     } /* endof for (i=0; i<n; i++) { */
577 
578     /* MatPivotRefine() */
579     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
580       /*
581        * if no shift in this attempt & shifting & started shifting & can refine,
582        * then try lower shift
583        */
584       sctx.shift_hi       = sctx.shift_fraction;
585       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
586       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
587       sctx.newshift       = PETSC_TRUE;
588       sctx.nshift++;
589     }
590   } while (sctx.newshift);
591 
592   PetscCall(PetscFree(rtmp));
593   PetscCall(ISRestoreIndices(isicol,&ic));
594   PetscCall(ISRestoreIndices(isrow,&r));
595 
596   PetscCall(ISIdentity(isrow,&row_identity));
597   PetscCall(ISIdentity(isicol,&col_identity));
598   if (b->inode.size) {
599     C->ops->solve = MatSolve_SeqAIJ_Inode;
600   } else if (row_identity && col_identity) {
601     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
602   } else {
603     C->ops->solve = MatSolve_SeqAIJ;
604   }
605   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
606   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
607   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
608   C->ops->matsolve          = MatMatSolve_SeqAIJ;
609   C->assembled              = PETSC_TRUE;
610   C->preallocated           = PETSC_TRUE;
611 
612   PetscCall(PetscLogFlops(C->cmap->n));
613 
614   /* MatShiftView(A,info,&sctx) */
615   if (sctx.nshift) {
616     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
617       PetscCall(PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top));
618     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
619       PetscCall(PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
620     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
621       PetscCall(PetscInfo(A,"number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n",sctx.nshift,(double)info->shiftamount));
622     }
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
628 {
629   Mat             C     =B;
630   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
631   IS              isrow = b->row,isicol = b->icol;
632   const PetscInt  *r,*ic,*ics;
633   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
634   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
635   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
636   MatScalar       *pv,*rtmp,*pc,multiplier,d;
637   const MatScalar *v,*aa=a->a;
638   PetscReal       rs=0.0;
639   FactorShiftCtx  sctx;
640   const PetscInt  *ddiag;
641   PetscBool       row_identity, col_identity;
642 
643   PetscFunctionBegin;
644   /* MatPivotSetUp(): initialize shift context sctx */
645   PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx)));
646 
647   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
648     ddiag          = a->diag;
649     sctx.shift_top = info->zeropivot;
650     for (i=0; i<n; i++) {
651       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
652       d  = (aa)[ddiag[i]];
653       rs = -PetscAbsScalar(d) - PetscRealPart(d);
654       v  = aa+ai[i];
655       nz = ai[i+1] - ai[i];
656       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
657       if (rs>sctx.shift_top) sctx.shift_top = rs;
658     }
659     sctx.shift_top *= 1.1;
660     sctx.nshift_max = 5;
661     sctx.shift_lo   = 0.;
662     sctx.shift_hi   = 1.;
663   }
664 
665   PetscCall(ISGetIndices(isrow,&r));
666   PetscCall(ISGetIndices(isicol,&ic));
667   PetscCall(PetscMalloc1(n+1,&rtmp));
668   ics  = ic;
669 
670   do {
671     sctx.newshift = PETSC_FALSE;
672     for (i=0; i<n; i++) {
673       nz    = bi[i+1] - bi[i];
674       bjtmp = bj + bi[i];
675       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
676 
677       /* load in initial (unfactored row) */
678       nz    = ai[r[i]+1] - ai[r[i]];
679       ajtmp = aj + ai[r[i]];
680       v     = aa + ai[r[i]];
681       for (j=0; j<nz; j++) {
682         rtmp[ics[ajtmp[j]]] = v[j];
683       }
684       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
685 
686       row = *bjtmp++;
687       while  (row < i) {
688         pc = rtmp + row;
689         if (*pc != 0.0) {
690           pv         = b->a + diag_offset[row];
691           pj         = b->j + diag_offset[row] + 1;
692           multiplier = *pc / *pv++;
693           *pc        = multiplier;
694           nz         = bi[row+1] - diag_offset[row] - 1;
695           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
696           PetscCall(PetscLogFlops(1+2.0*nz));
697         }
698         row = *bjtmp++;
699       }
700       /* finished row so stick it into b->a */
701       pv   = b->a + bi[i];
702       pj   = b->j + bi[i];
703       nz   = bi[i+1] - bi[i];
704       diag = diag_offset[i] - bi[i];
705       rs   = 0.0;
706       for (j=0; j<nz; j++) {
707         pv[j] = rtmp[pj[j]];
708         rs   += PetscAbsScalar(pv[j]);
709       }
710       rs -= PetscAbsScalar(pv[diag]);
711 
712       sctx.rs = rs;
713       sctx.pv = pv[diag];
714       PetscCall(MatPivotCheck(B,A,info,&sctx,i));
715       if (sctx.newshift) break;
716       pv[diag] = sctx.pv;
717     }
718 
719     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
720       /*
721        * if no shift in this attempt & shifting & started shifting & can refine,
722        * then try lower shift
723        */
724       sctx.shift_hi       = sctx.shift_fraction;
725       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
726       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
727       sctx.newshift       = PETSC_TRUE;
728       sctx.nshift++;
729     }
730   } while (sctx.newshift);
731 
732   /* invert diagonal entries for simpler triangular solves */
733   for (i=0; i<n; i++) {
734     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
735   }
736   PetscCall(PetscFree(rtmp));
737   PetscCall(ISRestoreIndices(isicol,&ic));
738   PetscCall(ISRestoreIndices(isrow,&r));
739 
740   PetscCall(ISIdentity(isrow,&row_identity));
741   PetscCall(ISIdentity(isicol,&col_identity));
742   if (row_identity && col_identity) {
743     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
744   } else {
745     C->ops->solve = MatSolve_SeqAIJ_inplace;
746   }
747   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
748   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
749   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
750   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
751 
752   C->assembled    = PETSC_TRUE;
753   C->preallocated = PETSC_TRUE;
754 
755   PetscCall(PetscLogFlops(C->cmap->n));
756   if (sctx.nshift) {
757     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
758       PetscCall(PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top));
759     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
760       PetscCall(PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
761     }
762   }
763   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
764   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
765 
766   PetscCall(MatSeqAIJCheckInode(C));
767   PetscFunctionReturn(0);
768 }
769 
770 /*
771    This routine implements inplace ILU(0) with row or/and column permutations.
772    Input:
773      A - original matrix
774    Output;
775      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
776          a->j (col index) is permuted by the inverse of colperm, then sorted
777          a->a reordered accordingly with a->j
778          a->diag (ptr to diagonal elements) is updated.
779 */
780 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
781 {
782   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data;
783   IS              isrow = a->row,isicol = a->icol;
784   const PetscInt  *r,*ic,*ics;
785   PetscInt        i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
786   PetscInt        *ajtmp,nz,row;
787   PetscInt        *diag = a->diag,nbdiag,*pj;
788   PetscScalar     *rtmp,*pc,multiplier,d;
789   MatScalar       *pv,*v;
790   PetscReal       rs;
791   FactorShiftCtx  sctx;
792   const MatScalar *aa=a->a,*vtmp;
793 
794   PetscFunctionBegin;
795   PetscCheck(A == B,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
796 
797   /* MatPivotSetUp(): initialize shift context sctx */
798   PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx)));
799 
800   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
801     const PetscInt *ddiag = a->diag;
802     sctx.shift_top = info->zeropivot;
803     for (i=0; i<n; i++) {
804       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
805       d    = (aa)[ddiag[i]];
806       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
807       vtmp = aa+ai[i];
808       nz   = ai[i+1] - ai[i];
809       for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
810       if (rs>sctx.shift_top) sctx.shift_top = rs;
811     }
812     sctx.shift_top *= 1.1;
813     sctx.nshift_max = 5;
814     sctx.shift_lo   = 0.;
815     sctx.shift_hi   = 1.;
816   }
817 
818   PetscCall(ISGetIndices(isrow,&r));
819   PetscCall(ISGetIndices(isicol,&ic));
820   PetscCall(PetscMalloc1(n+1,&rtmp));
821   PetscCall(PetscArrayzero(rtmp,n+1));
822   ics  = ic;
823 
824 #if defined(MV)
825   sctx.shift_top      = 0.;
826   sctx.nshift_max     = 0;
827   sctx.shift_lo       = 0.;
828   sctx.shift_hi       = 0.;
829   sctx.shift_fraction = 0.;
830 
831   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
832     sctx.shift_top = 0.;
833     for (i=0; i<n; i++) {
834       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
835       d  = (a->a)[diag[i]];
836       rs = -PetscAbsScalar(d) - PetscRealPart(d);
837       v  = a->a+ai[i];
838       nz = ai[i+1] - ai[i];
839       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
840       if (rs>sctx.shift_top) sctx.shift_top = rs;
841     }
842     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
843     sctx.shift_top *= 1.1;
844     sctx.nshift_max = 5;
845     sctx.shift_lo   = 0.;
846     sctx.shift_hi   = 1.;
847   }
848 
849   sctx.shift_amount = 0.;
850   sctx.nshift       = 0;
851 #endif
852 
853   do {
854     sctx.newshift = PETSC_FALSE;
855     for (i=0; i<n; i++) {
856       /* load in initial unfactored row */
857       nz    = ai[r[i]+1] - ai[r[i]];
858       ajtmp = aj + ai[r[i]];
859       v     = a->a + ai[r[i]];
860       /* sort permuted ajtmp and values v accordingly */
861       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
862       PetscCall(PetscSortIntWithScalarArray(nz,ajtmp,v));
863 
864       diag[r[i]] = ai[r[i]];
865       for (j=0; j<nz; j++) {
866         rtmp[ajtmp[j]] = v[j];
867         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
868       }
869       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
870 
871       row = *ajtmp++;
872       while  (row < i) {
873         pc = rtmp + row;
874         if (*pc != 0.0) {
875           pv = a->a + diag[r[row]];
876           pj = aj + diag[r[row]] + 1;
877 
878           multiplier = *pc / *pv++;
879           *pc        = multiplier;
880           nz         = ai[r[row]+1] - diag[r[row]] - 1;
881           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
882           PetscCall(PetscLogFlops(1+2.0*nz));
883         }
884         row = *ajtmp++;
885       }
886       /* finished row so overwrite it onto a->a */
887       pv     = a->a + ai[r[i]];
888       pj     = aj + ai[r[i]];
889       nz     = ai[r[i]+1] - ai[r[i]];
890       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
891 
892       rs = 0.0;
893       for (j=0; j<nz; j++) {
894         pv[j] = rtmp[pj[j]];
895         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
896       }
897 
898       sctx.rs = rs;
899       sctx.pv = pv[nbdiag];
900       PetscCall(MatPivotCheck(B,A,info,&sctx,i));
901       if (sctx.newshift) break;
902       pv[nbdiag] = sctx.pv;
903     }
904 
905     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
906       /*
907        * if no shift in this attempt & shifting & started shifting & can refine,
908        * then try lower shift
909        */
910       sctx.shift_hi       = sctx.shift_fraction;
911       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
912       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
913       sctx.newshift       = PETSC_TRUE;
914       sctx.nshift++;
915     }
916   } while (sctx.newshift);
917 
918   /* invert diagonal entries for simpler triangular solves */
919   for (i=0; i<n; i++) {
920     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
921   }
922 
923   PetscCall(PetscFree(rtmp));
924   PetscCall(ISRestoreIndices(isicol,&ic));
925   PetscCall(ISRestoreIndices(isrow,&r));
926 
927   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
928   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
929   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
930   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
931 
932   A->assembled    = PETSC_TRUE;
933   A->preallocated = PETSC_TRUE;
934 
935   PetscCall(PetscLogFlops(A->cmap->n));
936   if (sctx.nshift) {
937     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
938       PetscCall(PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top));
939     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
940       PetscCall(PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
941     }
942   }
943   PetscFunctionReturn(0);
944 }
945 
946 /* ----------------------------------------------------------- */
947 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
948 {
949   Mat            C;
950 
951   PetscFunctionBegin;
952   PetscCall(MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C));
953   PetscCall(MatLUFactorSymbolic(C,A,row,col,info));
954   PetscCall(MatLUFactorNumeric(C,A,info));
955 
956   A->ops->solve          = C->ops->solve;
957   A->ops->solvetranspose = C->ops->solvetranspose;
958 
959   PetscCall(MatHeaderMerge(A,&C));
960   PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol));
961   PetscFunctionReturn(0);
962 }
963 /* ----------------------------------------------------------- */
964 
965 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
966 {
967   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
968   IS                iscol = a->col,isrow = a->row;
969   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
970   PetscInt          nz;
971   const PetscInt    *rout,*cout,*r,*c;
972   PetscScalar       *x,*tmp,*tmps,sum;
973   const PetscScalar *b;
974   const MatScalar   *aa = a->a,*v;
975 
976   PetscFunctionBegin;
977   if (!n) PetscFunctionReturn(0);
978 
979   PetscCall(VecGetArrayRead(bb,&b));
980   PetscCall(VecGetArrayWrite(xx,&x));
981   tmp  = a->solve_work;
982 
983   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
984   PetscCall(ISGetIndices(iscol,&cout)); c = cout + (n-1);
985 
986   /* forward solve the lower triangular */
987   tmp[0] = b[*r++];
988   tmps   = tmp;
989   for (i=1; i<n; i++) {
990     v   = aa + ai[i];
991     vi  = aj + ai[i];
992     nz  = a->diag[i] - ai[i];
993     sum = b[*r++];
994     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
995     tmp[i] = sum;
996   }
997 
998   /* backward solve the upper triangular */
999   for (i=n-1; i>=0; i--) {
1000     v   = aa + a->diag[i] + 1;
1001     vi  = aj + a->diag[i] + 1;
1002     nz  = ai[i+1] - a->diag[i] - 1;
1003     sum = tmp[i];
1004     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1005     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1006   }
1007 
1008   PetscCall(ISRestoreIndices(isrow,&rout));
1009   PetscCall(ISRestoreIndices(iscol,&cout));
1010   PetscCall(VecRestoreArrayRead(bb,&b));
1011   PetscCall(VecRestoreArrayWrite(xx,&x));
1012   PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n));
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1017 {
1018   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1019   IS                iscol = a->col,isrow = a->row;
1020   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1021   PetscInt          nz,neq,ldb,ldx;
1022   const PetscInt    *rout,*cout,*r,*c;
1023   PetscScalar       *x,*tmp = a->solve_work,*tmps,sum;
1024   const PetscScalar *b,*aa = a->a,*v;
1025   PetscBool         isdense;
1026 
1027   PetscFunctionBegin;
1028   if (!n) PetscFunctionReturn(0);
1029   PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense));
1030   PetscCheck(isdense,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1031   if (X != B) {
1032     PetscCall(PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense));
1033     PetscCheck(isdense,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1034   }
1035   PetscCall(MatDenseGetArrayRead(B,&b));
1036   PetscCall(MatDenseGetLDA(B,&ldb));
1037   PetscCall(MatDenseGetArray(X,&x));
1038   PetscCall(MatDenseGetLDA(X,&ldx));
1039   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
1040   PetscCall(ISGetIndices(iscol,&cout)); c = cout;
1041   for (neq=0; neq<B->cmap->n; neq++) {
1042     /* forward solve the lower triangular */
1043     tmp[0] = b[r[0]];
1044     tmps   = tmp;
1045     for (i=1; i<n; i++) {
1046       v   = aa + ai[i];
1047       vi  = aj + ai[i];
1048       nz  = a->diag[i] - ai[i];
1049       sum = b[r[i]];
1050       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1051       tmp[i] = sum;
1052     }
1053     /* backward solve the upper triangular */
1054     for (i=n-1; i>=0; i--) {
1055       v   = aa + a->diag[i] + 1;
1056       vi  = aj + a->diag[i] + 1;
1057       nz  = ai[i+1] - a->diag[i] - 1;
1058       sum = tmp[i];
1059       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1060       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1061     }
1062     b += ldb;
1063     x += ldx;
1064   }
1065   PetscCall(ISRestoreIndices(isrow,&rout));
1066   PetscCall(ISRestoreIndices(iscol,&cout));
1067   PetscCall(MatDenseRestoreArrayRead(B,&b));
1068   PetscCall(MatDenseRestoreArray(X,&x));
1069   PetscCall(PetscLogFlops(B->cmap->n*(2.0*a->nz - n)));
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1074 {
1075   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1076   IS                iscol = a->col,isrow = a->row;
1077   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1078   PetscInt          nz,neq,ldb,ldx;
1079   const PetscInt    *rout,*cout,*r,*c;
1080   PetscScalar       *x,*tmp = a->solve_work,sum;
1081   const PetscScalar *b,*aa = a->a,*v;
1082   PetscBool         isdense;
1083 
1084   PetscFunctionBegin;
1085   if (!n) PetscFunctionReturn(0);
1086   PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense));
1087   PetscCheck(isdense,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1088   if (X != B) {
1089     PetscCall(PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense));
1090     PetscCheck(isdense,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1091   }
1092   PetscCall(MatDenseGetArrayRead(B,&b));
1093   PetscCall(MatDenseGetLDA(B,&ldb));
1094   PetscCall(MatDenseGetArray(X,&x));
1095   PetscCall(MatDenseGetLDA(X,&ldx));
1096   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
1097   PetscCall(ISGetIndices(iscol,&cout)); c = cout;
1098   for (neq=0; neq<B->cmap->n; neq++) {
1099     /* forward solve the lower triangular */
1100     tmp[0] = b[r[0]];
1101     v      = aa;
1102     vi     = aj;
1103     for (i=1; i<n; i++) {
1104       nz  = ai[i+1] - ai[i];
1105       sum = b[r[i]];
1106       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1107       tmp[i] = sum;
1108       v     += nz; vi += nz;
1109     }
1110     /* backward solve the upper triangular */
1111     for (i=n-1; i>=0; i--) {
1112       v   = aa + adiag[i+1]+1;
1113       vi  = aj + adiag[i+1]+1;
1114       nz  = adiag[i]-adiag[i+1]-1;
1115       sum = tmp[i];
1116       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1117       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1118     }
1119     b += ldb;
1120     x += ldx;
1121   }
1122   PetscCall(ISRestoreIndices(isrow,&rout));
1123   PetscCall(ISRestoreIndices(iscol,&cout));
1124   PetscCall(MatDenseRestoreArrayRead(B,&b));
1125   PetscCall(MatDenseRestoreArray(X,&x));
1126   PetscCall(PetscLogFlops(B->cmap->n*(2.0*a->nz - n)));
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1131 {
1132   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1133   IS                iscol = a->col,isrow = a->row;
1134   const PetscInt    *r,*c,*rout,*cout;
1135   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1136   PetscInt          nz,row;
1137   PetscScalar       *x,*tmp,*tmps,sum;
1138   const PetscScalar *b;
1139   const MatScalar   *aa = a->a,*v;
1140 
1141   PetscFunctionBegin;
1142   if (!n) PetscFunctionReturn(0);
1143 
1144   PetscCall(VecGetArrayRead(bb,&b));
1145   PetscCall(VecGetArrayWrite(xx,&x));
1146   tmp  = a->solve_work;
1147 
1148   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
1149   PetscCall(ISGetIndices(iscol,&cout)); c = cout + (n-1);
1150 
1151   /* forward solve the lower triangular */
1152   tmp[0] = b[*r++];
1153   tmps   = tmp;
1154   for (row=1; row<n; row++) {
1155     i   = rout[row]; /* permuted row */
1156     v   = aa + ai[i];
1157     vi  = aj + ai[i];
1158     nz  = a->diag[i] - ai[i];
1159     sum = b[*r++];
1160     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1161     tmp[row] = sum;
1162   }
1163 
1164   /* backward solve the upper triangular */
1165   for (row=n-1; row>=0; row--) {
1166     i   = rout[row]; /* permuted row */
1167     v   = aa + a->diag[i] + 1;
1168     vi  = aj + a->diag[i] + 1;
1169     nz  = ai[i+1] - a->diag[i] - 1;
1170     sum = tmp[row];
1171     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1172     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1173   }
1174 
1175   PetscCall(ISRestoreIndices(isrow,&rout));
1176   PetscCall(ISRestoreIndices(iscol,&cout));
1177   PetscCall(VecRestoreArrayRead(bb,&b));
1178   PetscCall(VecRestoreArrayWrite(xx,&x));
1179   PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n));
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 /* ----------------------------------------------------------- */
1184 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1185 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1186 {
1187   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1188   PetscInt          n   = A->rmap->n;
1189   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1190   PetscScalar       *x;
1191   const PetscScalar *b;
1192   const MatScalar   *aa = a->a;
1193 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1194   PetscInt        adiag_i,i,nz,ai_i;
1195   const PetscInt  *vi;
1196   const MatScalar *v;
1197   PetscScalar     sum;
1198 #endif
1199 
1200   PetscFunctionBegin;
1201   if (!n) PetscFunctionReturn(0);
1202 
1203   PetscCall(VecGetArrayRead(bb,&b));
1204   PetscCall(VecGetArrayWrite(xx,&x));
1205 
1206 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1207   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1208 #else
1209   /* forward solve the lower triangular */
1210   x[0] = b[0];
1211   for (i=1; i<n; i++) {
1212     ai_i = ai[i];
1213     v    = aa + ai_i;
1214     vi   = aj + ai_i;
1215     nz   = adiag[i] - ai_i;
1216     sum  = b[i];
1217     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1218     x[i] = sum;
1219   }
1220 
1221   /* backward solve the upper triangular */
1222   for (i=n-1; i>=0; i--) {
1223     adiag_i = adiag[i];
1224     v       = aa + adiag_i + 1;
1225     vi      = aj + adiag_i + 1;
1226     nz      = ai[i+1] - adiag_i - 1;
1227     sum     = x[i];
1228     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1229     x[i] = sum*aa[adiag_i];
1230   }
1231 #endif
1232   PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n));
1233   PetscCall(VecRestoreArrayRead(bb,&b));
1234   PetscCall(VecRestoreArrayWrite(xx,&x));
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1239 {
1240   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1241   IS                iscol = a->col,isrow = a->row;
1242   PetscInt          i, n = A->rmap->n,j;
1243   PetscInt          nz;
1244   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1245   PetscScalar       *x,*tmp,sum;
1246   const PetscScalar *b;
1247   const MatScalar   *aa = a->a,*v;
1248 
1249   PetscFunctionBegin;
1250   if (yy != xx) PetscCall(VecCopy(yy,xx));
1251 
1252   PetscCall(VecGetArrayRead(bb,&b));
1253   PetscCall(VecGetArray(xx,&x));
1254   tmp  = a->solve_work;
1255 
1256   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
1257   PetscCall(ISGetIndices(iscol,&cout)); c = cout + (n-1);
1258 
1259   /* forward solve the lower triangular */
1260   tmp[0] = b[*r++];
1261   for (i=1; i<n; i++) {
1262     v   = aa + ai[i];
1263     vi  = aj + ai[i];
1264     nz  = a->diag[i] - ai[i];
1265     sum = b[*r++];
1266     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1267     tmp[i] = sum;
1268   }
1269 
1270   /* backward solve the upper triangular */
1271   for (i=n-1; i>=0; i--) {
1272     v   = aa + a->diag[i] + 1;
1273     vi  = aj + a->diag[i] + 1;
1274     nz  = ai[i+1] - a->diag[i] - 1;
1275     sum = tmp[i];
1276     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1277     tmp[i]   = sum*aa[a->diag[i]];
1278     x[*c--] += tmp[i];
1279   }
1280 
1281   PetscCall(ISRestoreIndices(isrow,&rout));
1282   PetscCall(ISRestoreIndices(iscol,&cout));
1283   PetscCall(VecRestoreArrayRead(bb,&b));
1284   PetscCall(VecRestoreArray(xx,&x));
1285   PetscCall(PetscLogFlops(2.0*a->nz));
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1290 {
1291   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1292   IS                iscol = a->col,isrow = a->row;
1293   PetscInt          i, n = A->rmap->n,j;
1294   PetscInt          nz;
1295   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1296   PetscScalar       *x,*tmp,sum;
1297   const PetscScalar *b;
1298   const MatScalar   *aa = a->a,*v;
1299 
1300   PetscFunctionBegin;
1301   if (yy != xx) PetscCall(VecCopy(yy,xx));
1302 
1303   PetscCall(VecGetArrayRead(bb,&b));
1304   PetscCall(VecGetArray(xx,&x));
1305   tmp  = a->solve_work;
1306 
1307   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
1308   PetscCall(ISGetIndices(iscol,&cout)); c = cout;
1309 
1310   /* forward solve the lower triangular */
1311   tmp[0] = b[r[0]];
1312   v      = aa;
1313   vi     = aj;
1314   for (i=1; i<n; i++) {
1315     nz  = ai[i+1] - ai[i];
1316     sum = b[r[i]];
1317     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1318     tmp[i] = sum;
1319     v     += nz;
1320     vi    += nz;
1321   }
1322 
1323   /* backward solve the upper triangular */
1324   v  = aa + adiag[n-1];
1325   vi = aj + adiag[n-1];
1326   for (i=n-1; i>=0; i--) {
1327     nz  = adiag[i] - adiag[i+1] - 1;
1328     sum = tmp[i];
1329     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1330     tmp[i]   = sum*v[nz];
1331     x[c[i]] += tmp[i];
1332     v       += nz+1; vi += nz+1;
1333   }
1334 
1335   PetscCall(ISRestoreIndices(isrow,&rout));
1336   PetscCall(ISRestoreIndices(iscol,&cout));
1337   PetscCall(VecRestoreArrayRead(bb,&b));
1338   PetscCall(VecRestoreArray(xx,&x));
1339   PetscCall(PetscLogFlops(2.0*a->nz));
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1344 {
1345   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1346   IS                iscol = a->col,isrow = a->row;
1347   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1348   PetscInt          i,n = A->rmap->n,j;
1349   PetscInt          nz;
1350   PetscScalar       *x,*tmp,s1;
1351   const MatScalar   *aa = a->a,*v;
1352   const PetscScalar *b;
1353 
1354   PetscFunctionBegin;
1355   PetscCall(VecGetArrayRead(bb,&b));
1356   PetscCall(VecGetArrayWrite(xx,&x));
1357   tmp  = a->solve_work;
1358 
1359   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
1360   PetscCall(ISGetIndices(iscol,&cout)); c = cout;
1361 
1362   /* copy the b into temp work space according to permutation */
1363   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1364 
1365   /* forward solve the U^T */
1366   for (i=0; i<n; i++) {
1367     v   = aa + diag[i];
1368     vi  = aj + diag[i] + 1;
1369     nz  = ai[i+1] - diag[i] - 1;
1370     s1  = tmp[i];
1371     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1372     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1373     tmp[i] = s1;
1374   }
1375 
1376   /* backward solve the L^T */
1377   for (i=n-1; i>=0; i--) {
1378     v  = aa + diag[i] - 1;
1379     vi = aj + diag[i] - 1;
1380     nz = diag[i] - ai[i];
1381     s1 = tmp[i];
1382     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1383   }
1384 
1385   /* copy tmp into x according to permutation */
1386   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1387 
1388   PetscCall(ISRestoreIndices(isrow,&rout));
1389   PetscCall(ISRestoreIndices(iscol,&cout));
1390   PetscCall(VecRestoreArrayRead(bb,&b));
1391   PetscCall(VecRestoreArrayWrite(xx,&x));
1392 
1393   PetscCall(PetscLogFlops(2.0*a->nz-A->cmap->n));
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1398 {
1399   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1400   IS                iscol = a->col,isrow = a->row;
1401   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1402   PetscInt          i,n = A->rmap->n,j;
1403   PetscInt          nz;
1404   PetscScalar       *x,*tmp,s1;
1405   const MatScalar   *aa = a->a,*v;
1406   const PetscScalar *b;
1407 
1408   PetscFunctionBegin;
1409   PetscCall(VecGetArrayRead(bb,&b));
1410   PetscCall(VecGetArrayWrite(xx,&x));
1411   tmp  = a->solve_work;
1412 
1413   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
1414   PetscCall(ISGetIndices(iscol,&cout)); c = cout;
1415 
1416   /* copy the b into temp work space according to permutation */
1417   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1418 
1419   /* forward solve the U^T */
1420   for (i=0; i<n; i++) {
1421     v   = aa + adiag[i+1] + 1;
1422     vi  = aj + adiag[i+1] + 1;
1423     nz  = adiag[i] - adiag[i+1] - 1;
1424     s1  = tmp[i];
1425     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1426     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1427     tmp[i] = s1;
1428   }
1429 
1430   /* backward solve the L^T */
1431   for (i=n-1; i>=0; i--) {
1432     v  = aa + ai[i];
1433     vi = aj + ai[i];
1434     nz = ai[i+1] - ai[i];
1435     s1 = tmp[i];
1436     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1437   }
1438 
1439   /* copy tmp into x according to permutation */
1440   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1441 
1442   PetscCall(ISRestoreIndices(isrow,&rout));
1443   PetscCall(ISRestoreIndices(iscol,&cout));
1444   PetscCall(VecRestoreArrayRead(bb,&b));
1445   PetscCall(VecRestoreArrayWrite(xx,&x));
1446 
1447   PetscCall(PetscLogFlops(2.0*a->nz-A->cmap->n));
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1452 {
1453   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1454   IS                iscol = a->col,isrow = a->row;
1455   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1456   PetscInt          i,n = A->rmap->n,j;
1457   PetscInt          nz;
1458   PetscScalar       *x,*tmp,s1;
1459   const MatScalar   *aa = a->a,*v;
1460   const PetscScalar *b;
1461 
1462   PetscFunctionBegin;
1463   if (zz != xx) PetscCall(VecCopy(zz,xx));
1464   PetscCall(VecGetArrayRead(bb,&b));
1465   PetscCall(VecGetArray(xx,&x));
1466   tmp  = a->solve_work;
1467 
1468   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
1469   PetscCall(ISGetIndices(iscol,&cout)); c = cout;
1470 
1471   /* copy the b into temp work space according to permutation */
1472   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1473 
1474   /* forward solve the U^T */
1475   for (i=0; i<n; i++) {
1476     v   = aa + diag[i];
1477     vi  = aj + diag[i] + 1;
1478     nz  = ai[i+1] - diag[i] - 1;
1479     s1  = tmp[i];
1480     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1481     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1482     tmp[i] = s1;
1483   }
1484 
1485   /* backward solve the L^T */
1486   for (i=n-1; i>=0; i--) {
1487     v  = aa + diag[i] - 1;
1488     vi = aj + diag[i] - 1;
1489     nz = diag[i] - ai[i];
1490     s1 = tmp[i];
1491     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1492   }
1493 
1494   /* copy tmp into x according to permutation */
1495   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1496 
1497   PetscCall(ISRestoreIndices(isrow,&rout));
1498   PetscCall(ISRestoreIndices(iscol,&cout));
1499   PetscCall(VecRestoreArrayRead(bb,&b));
1500   PetscCall(VecRestoreArray(xx,&x));
1501 
1502   PetscCall(PetscLogFlops(2.0*a->nz-A->cmap->n));
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1507 {
1508   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1509   IS                iscol = a->col,isrow = a->row;
1510   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1511   PetscInt          i,n = A->rmap->n,j;
1512   PetscInt          nz;
1513   PetscScalar       *x,*tmp,s1;
1514   const MatScalar   *aa = a->a,*v;
1515   const PetscScalar *b;
1516 
1517   PetscFunctionBegin;
1518   if (zz != xx) PetscCall(VecCopy(zz,xx));
1519   PetscCall(VecGetArrayRead(bb,&b));
1520   PetscCall(VecGetArray(xx,&x));
1521   tmp  = a->solve_work;
1522 
1523   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
1524   PetscCall(ISGetIndices(iscol,&cout)); c = cout;
1525 
1526   /* copy the b into temp work space according to permutation */
1527   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1528 
1529   /* forward solve the U^T */
1530   for (i=0; i<n; i++) {
1531     v   = aa + adiag[i+1] + 1;
1532     vi  = aj + adiag[i+1] + 1;
1533     nz  = adiag[i] - adiag[i+1] - 1;
1534     s1  = tmp[i];
1535     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1536     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1537     tmp[i] = s1;
1538   }
1539 
1540   /* backward solve the L^T */
1541   for (i=n-1; i>=0; i--) {
1542     v  = aa + ai[i];
1543     vi = aj + ai[i];
1544     nz = ai[i+1] - ai[i];
1545     s1 = tmp[i];
1546     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1547   }
1548 
1549   /* copy tmp into x according to permutation */
1550   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1551 
1552   PetscCall(ISRestoreIndices(isrow,&rout));
1553   PetscCall(ISRestoreIndices(iscol,&cout));
1554   PetscCall(VecRestoreArrayRead(bb,&b));
1555   PetscCall(VecRestoreArray(xx,&x));
1556 
1557   PetscCall(PetscLogFlops(2.0*a->nz-A->cmap->n));
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 /* ----------------------------------------------------------------*/
1562 
1563 /*
1564    ilu() under revised new data structure.
1565    Factored arrays bj and ba are stored as
1566      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1567 
1568    bi=fact->i is an array of size n+1, in which
1569    bi+
1570      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1571      bi[n]:  points to L(n-1,n-1)+1
1572 
1573   bdiag=fact->diag is an array of size n+1,in which
1574      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1575      bdiag[n]: points to entry of U(n-1,0)-1
1576 
1577    U(i,:) contains bdiag[i] as its last entry, i.e.,
1578     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1579 */
1580 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1581 {
1582   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
1583   const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1584   PetscInt       i,j,k=0,nz,*bi,*bj,*bdiag;
1585   IS             isicol;
1586 
1587   PetscFunctionBegin;
1588   PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol));
1589   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE));
1590   b    = (Mat_SeqAIJ*)(fact)->data;
1591 
1592   /* allocate matrix arrays for new data structure */
1593   PetscCall(PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i));
1594   PetscCall(PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt)));
1595 
1596   b->singlemalloc = PETSC_TRUE;
1597   if (!b->diag) {
1598     PetscCall(PetscMalloc1(n+1,&b->diag));
1599     PetscCall(PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt)));
1600   }
1601   bdiag = b->diag;
1602 
1603   if (n > 0) {
1604     PetscCall(PetscArrayzero(b->a,ai[n]));
1605   }
1606 
1607   /* set bi and bj with new data structure */
1608   bi = b->i;
1609   bj = b->j;
1610 
1611   /* L part */
1612   bi[0] = 0;
1613   for (i=0; i<n; i++) {
1614     nz      = adiag[i] - ai[i];
1615     bi[i+1] = bi[i] + nz;
1616     aj      = a->j + ai[i];
1617     for (j=0; j<nz; j++) {
1618       /*   *bj = aj[j]; bj++; */
1619       bj[k++] = aj[j];
1620     }
1621   }
1622 
1623   /* U part */
1624   bdiag[n] = bi[n]-1;
1625   for (i=n-1; i>=0; i--) {
1626     nz = ai[i+1] - adiag[i] - 1;
1627     aj = a->j + adiag[i] + 1;
1628     for (j=0; j<nz; j++) {
1629       /*      *bj = aj[j]; bj++; */
1630       bj[k++] = aj[j];
1631     }
1632     /* diag[i] */
1633     /*    *bj = i; bj++; */
1634     bj[k++]  = i;
1635     bdiag[i] = bdiag[i+1] + nz + 1;
1636   }
1637 
1638   fact->factortype             = MAT_FACTOR_ILU;
1639   fact->info.factor_mallocs    = 0;
1640   fact->info.fill_ratio_given  = info->fill;
1641   fact->info.fill_ratio_needed = 1.0;
1642   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1643   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1644 
1645   b       = (Mat_SeqAIJ*)(fact)->data;
1646   b->row  = isrow;
1647   b->col  = iscol;
1648   b->icol = isicol;
1649   PetscCall(PetscMalloc1(fact->rmap->n+1,&b->solve_work));
1650   PetscCall(PetscObjectReference((PetscObject)isrow));
1651   PetscCall(PetscObjectReference((PetscObject)iscol));
1652   PetscFunctionReturn(0);
1653 }
1654 
1655 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1656 {
1657   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1658   IS                 isicol;
1659   const PetscInt     *r,*ic;
1660   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1661   PetscInt           *bi,*cols,nnz,*cols_lvl;
1662   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1663   PetscInt           i,levels,diagonal_fill;
1664   PetscBool          col_identity,row_identity,missing;
1665   PetscReal          f;
1666   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1667   PetscBT            lnkbt;
1668   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1669   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1670   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1671 
1672   PetscFunctionBegin;
1673   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n);
1674   PetscCall(MatMissingDiagonal(A,&missing,&i));
1675   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i);
1676 
1677   levels = (PetscInt)info->levels;
1678   PetscCall(ISIdentity(isrow,&row_identity));
1679   PetscCall(ISIdentity(iscol,&col_identity));
1680   if (!levels && row_identity && col_identity) {
1681     /* special case: ilu(0) with natural ordering */
1682     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info));
1683     if (a->inode.size) {
1684       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1685     }
1686     PetscFunctionReturn(0);
1687   }
1688 
1689   PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol));
1690   PetscCall(ISGetIndices(isrow,&r));
1691   PetscCall(ISGetIndices(isicol,&ic));
1692 
1693   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1694   PetscCall(PetscMalloc1(n+1,&bi));
1695   PetscCall(PetscMalloc1(n+1,&bdiag));
1696   bi[0] = bdiag[0] = 0;
1697   PetscCall(PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr));
1698 
1699   /* create a linked list for storing column indices of the active row */
1700   nlnk = n + 1;
1701   PetscCall(PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt));
1702 
1703   /* initial FreeSpace size is f*(ai[n]+1) */
1704   f                 = info->fill;
1705   diagonal_fill     = (PetscInt)info->diagonal_fill;
1706   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space));
1707   current_space     = free_space;
1708   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl));
1709   current_space_lvl = free_space_lvl;
1710   for (i=0; i<n; i++) {
1711     nzi = 0;
1712     /* copy current row into linked list */
1713     nnz = ai[r[i]+1] - ai[r[i]];
1714     PetscCheck(nnz,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,r[i],i);
1715     cols   = aj + ai[r[i]];
1716     lnk[i] = -1; /* marker to indicate if diagonal exists */
1717     PetscCall(PetscIncompleteLLInit(nnz,cols,n,ic,&nlnk,lnk,lnk_lvl,lnkbt));
1718     nzi   += nlnk;
1719 
1720     /* make sure diagonal entry is included */
1721     if (diagonal_fill && lnk[i] == -1) {
1722       fm = n;
1723       while (lnk[fm] < i) fm = lnk[fm];
1724       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1725       lnk[fm]    = i;
1726       lnk_lvl[i] = 0;
1727       nzi++; dcount++;
1728     }
1729 
1730     /* add pivot rows into the active row */
1731     nzbd = 0;
1732     prow = lnk[n];
1733     while (prow < i) {
1734       nnz      = bdiag[prow];
1735       cols     = bj_ptr[prow] + nnz + 1;
1736       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1737       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1738       PetscCall(PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,&nlnk,lnk,lnk_lvl,lnkbt,prow));
1739       nzi     += nlnk;
1740       prow     = lnk[prow];
1741       nzbd++;
1742     }
1743     bdiag[i] = nzbd;
1744     bi[i+1]  = bi[i] + nzi;
1745     /* if free space is not available, make more free space */
1746     if (current_space->local_remaining<nzi) {
1747       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */
1748       PetscCall(PetscFreeSpaceGet(nnz,&current_space));
1749       PetscCall(PetscFreeSpaceGet(nnz,&current_space_lvl));
1750       reallocs++;
1751     }
1752 
1753     /* copy data into free_space and free_space_lvl, then initialize lnk */
1754     PetscCall(PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt));
1755     bj_ptr[i]    = current_space->array;
1756     bjlvl_ptr[i] = current_space_lvl->array;
1757 
1758     /* make sure the active row i has diagonal entry */
1759     PetscCheck(*(bj_ptr[i]+bdiag[i]) == i,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1760 
1761     current_space->array               += nzi;
1762     current_space->local_used          += nzi;
1763     current_space->local_remaining     -= nzi;
1764     current_space_lvl->array           += nzi;
1765     current_space_lvl->local_used      += nzi;
1766     current_space_lvl->local_remaining -= nzi;
1767   }
1768 
1769   PetscCall(ISRestoreIndices(isrow,&r));
1770   PetscCall(ISRestoreIndices(isicol,&ic));
1771   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1772   PetscCall(PetscMalloc1(bi[n]+1,&bj));
1773   PetscCall(PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag));
1774 
1775   PetscCall(PetscIncompleteLLDestroy(lnk,lnkbt));
1776   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1777   PetscCall(PetscFree2(bj_ptr,bjlvl_ptr));
1778 
1779 #if defined(PETSC_USE_INFO)
1780   {
1781     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1782     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af));
1783     PetscCall(PetscInfo(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af));
1784     PetscCall(PetscInfo(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af));
1785     PetscCall(PetscInfo(A,"for best performance.\n"));
1786     if (diagonal_fill) {
1787       PetscCall(PetscInfo(A,"Detected and replaced %" PetscInt_FMT " missing diagonals\n",dcount));
1788     }
1789   }
1790 #endif
1791   /* put together the new matrix */
1792   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL));
1793   PetscCall(PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol));
1794   b    = (Mat_SeqAIJ*)(fact)->data;
1795 
1796   b->free_a       = PETSC_TRUE;
1797   b->free_ij      = PETSC_TRUE;
1798   b->singlemalloc = PETSC_FALSE;
1799 
1800   PetscCall(PetscMalloc1(bdiag[0]+1,&b->a));
1801 
1802   b->j    = bj;
1803   b->i    = bi;
1804   b->diag = bdiag;
1805   b->ilen = NULL;
1806   b->imax = NULL;
1807   b->row  = isrow;
1808   b->col  = iscol;
1809   PetscCall(PetscObjectReference((PetscObject)isrow));
1810   PetscCall(PetscObjectReference((PetscObject)iscol));
1811   b->icol = isicol;
1812 
1813   PetscCall(PetscMalloc1(n+1,&b->solve_work));
1814   /* In b structure:  Free imax, ilen, old a, old j.
1815      Allocate bdiag, solve_work, new a, new j */
1816   PetscCall(PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar))));
1817   b->maxnz = b->nz = bdiag[0]+1;
1818 
1819   (fact)->info.factor_mallocs    = reallocs;
1820   (fact)->info.fill_ratio_given  = f;
1821   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1822   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1823   if (a->inode.size) {
1824     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1825   }
1826   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1827   PetscFunctionReturn(0);
1828 }
1829 
1830 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1831 {
1832   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1833   IS                 isicol;
1834   const PetscInt     *r,*ic;
1835   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1836   PetscInt           *bi,*cols,nnz,*cols_lvl;
1837   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1838   PetscInt           i,levels,diagonal_fill;
1839   PetscBool          col_identity,row_identity;
1840   PetscReal          f;
1841   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1842   PetscBT            lnkbt;
1843   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1844   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1845   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1846   PetscBool          missing;
1847 
1848   PetscFunctionBegin;
1849   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n);
1850   PetscCall(MatMissingDiagonal(A,&missing,&i));
1851   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i);
1852 
1853   f             = info->fill;
1854   levels        = (PetscInt)info->levels;
1855   diagonal_fill = (PetscInt)info->diagonal_fill;
1856 
1857   PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol));
1858 
1859   PetscCall(ISIdentity(isrow,&row_identity));
1860   PetscCall(ISIdentity(iscol,&col_identity));
1861   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1862     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE));
1863 
1864     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1865     if (a->inode.size) {
1866       (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1867     }
1868     fact->factortype               = MAT_FACTOR_ILU;
1869     (fact)->info.factor_mallocs    = 0;
1870     (fact)->info.fill_ratio_given  = info->fill;
1871     (fact)->info.fill_ratio_needed = 1.0;
1872 
1873     b    = (Mat_SeqAIJ*)(fact)->data;
1874     b->row  = isrow;
1875     b->col  = iscol;
1876     b->icol = isicol;
1877     PetscCall(PetscMalloc1((fact)->rmap->n+1,&b->solve_work));
1878     PetscCall(PetscObjectReference((PetscObject)isrow));
1879     PetscCall(PetscObjectReference((PetscObject)iscol));
1880     PetscFunctionReturn(0);
1881   }
1882 
1883   PetscCall(ISGetIndices(isrow,&r));
1884   PetscCall(ISGetIndices(isicol,&ic));
1885 
1886   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1887   PetscCall(PetscMalloc1(n+1,&bi));
1888   PetscCall(PetscMalloc1(n+1,&bdiag));
1889   bi[0] = bdiag[0] = 0;
1890 
1891   PetscCall(PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr));
1892 
1893   /* create a linked list for storing column indices of the active row */
1894   nlnk = n + 1;
1895   PetscCall(PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt));
1896 
1897   /* initial FreeSpace size is f*(ai[n]+1) */
1898   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space));
1899   current_space     = free_space;
1900   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl));
1901   current_space_lvl = free_space_lvl;
1902 
1903   for (i=0; i<n; i++) {
1904     nzi = 0;
1905     /* copy current row into linked list */
1906     nnz = ai[r[i]+1] - ai[r[i]];
1907     PetscCheck(nnz,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,r[i],i);
1908     cols   = aj + ai[r[i]];
1909     lnk[i] = -1; /* marker to indicate if diagonal exists */
1910     PetscCall(PetscIncompleteLLInit(nnz,cols,n,ic,&nlnk,lnk,lnk_lvl,lnkbt));
1911     nzi   += nlnk;
1912 
1913     /* make sure diagonal entry is included */
1914     if (diagonal_fill && lnk[i] == -1) {
1915       fm = n;
1916       while (lnk[fm] < i) fm = lnk[fm];
1917       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1918       lnk[fm]    = i;
1919       lnk_lvl[i] = 0;
1920       nzi++; dcount++;
1921     }
1922 
1923     /* add pivot rows into the active row */
1924     nzbd = 0;
1925     prow = lnk[n];
1926     while (prow < i) {
1927       nnz      = bdiag[prow];
1928       cols     = bj_ptr[prow] + nnz + 1;
1929       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1930       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1931       PetscCall(PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,&nlnk,lnk,lnk_lvl,lnkbt,prow));
1932       nzi     += nlnk;
1933       prow     = lnk[prow];
1934       nzbd++;
1935     }
1936     bdiag[i] = nzbd;
1937     bi[i+1]  = bi[i] + nzi;
1938 
1939     /* if free space is not available, make more free space */
1940     if (current_space->local_remaining<nzi) {
1941       nnz  = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */
1942       PetscCall(PetscFreeSpaceGet(nnz,&current_space));
1943       PetscCall(PetscFreeSpaceGet(nnz,&current_space_lvl));
1944       reallocs++;
1945     }
1946 
1947     /* copy data into free_space and free_space_lvl, then initialize lnk */
1948     PetscCall(PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt));
1949     bj_ptr[i]    = current_space->array;
1950     bjlvl_ptr[i] = current_space_lvl->array;
1951 
1952     /* make sure the active row i has diagonal entry */
1953     PetscCheck(*(bj_ptr[i]+bdiag[i]) == i,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1954 
1955     current_space->array               += nzi;
1956     current_space->local_used          += nzi;
1957     current_space->local_remaining     -= nzi;
1958     current_space_lvl->array           += nzi;
1959     current_space_lvl->local_used      += nzi;
1960     current_space_lvl->local_remaining -= nzi;
1961   }
1962 
1963   PetscCall(ISRestoreIndices(isrow,&r));
1964   PetscCall(ISRestoreIndices(isicol,&ic));
1965 
1966   /* destroy list of free space and other temporary arrays */
1967   PetscCall(PetscMalloc1(bi[n]+1,&bj));
1968   PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); /* copy free_space -> bj */
1969   PetscCall(PetscIncompleteLLDestroy(lnk,lnkbt));
1970   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1971   PetscCall(PetscFree2(bj_ptr,bjlvl_ptr));
1972 
1973 #if defined(PETSC_USE_INFO)
1974   {
1975     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1976     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af));
1977     PetscCall(PetscInfo(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af));
1978     PetscCall(PetscInfo(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af));
1979     PetscCall(PetscInfo(A,"for best performance.\n"));
1980     if (diagonal_fill) {
1981       PetscCall(PetscInfo(A,"Detected and replaced %" PetscInt_FMT " missing diagonals\n",dcount));
1982     }
1983   }
1984 #endif
1985 
1986   /* put together the new matrix */
1987   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL));
1988   PetscCall(PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol));
1989   b    = (Mat_SeqAIJ*)(fact)->data;
1990 
1991   b->free_a       = PETSC_TRUE;
1992   b->free_ij      = PETSC_TRUE;
1993   b->singlemalloc = PETSC_FALSE;
1994 
1995   PetscCall(PetscMalloc1(bi[n],&b->a));
1996   b->j = bj;
1997   b->i = bi;
1998   for (i=0; i<n; i++) bdiag[i] += bi[i];
1999   b->diag = bdiag;
2000   b->ilen = NULL;
2001   b->imax = NULL;
2002   b->row  = isrow;
2003   b->col  = iscol;
2004   PetscCall(PetscObjectReference((PetscObject)isrow));
2005   PetscCall(PetscObjectReference((PetscObject)iscol));
2006   b->icol = isicol;
2007   PetscCall(PetscMalloc1(n+1,&b->solve_work));
2008   /* In b structure:  Free imax, ilen, old a, old j.
2009      Allocate bdiag, solve_work, new a, new j */
2010   PetscCall(PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar))));
2011   b->maxnz = b->nz = bi[n];
2012 
2013   (fact)->info.factor_mallocs    = reallocs;
2014   (fact)->info.fill_ratio_given  = f;
2015   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2016   (fact)->ops->lufactornumeric   =  MatLUFactorNumeric_SeqAIJ_inplace;
2017   if (a->inode.size) {
2018     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2019   }
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2024 {
2025   Mat            C = B;
2026   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2027   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2028   IS             ip=b->row,iip = b->icol;
2029   const PetscInt *rip,*riip;
2030   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2031   PetscInt       *ai=a->i,*aj=a->j;
2032   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2033   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2034   PetscBool      perm_identity;
2035   FactorShiftCtx sctx;
2036   PetscReal      rs;
2037   MatScalar      d,*v;
2038 
2039   PetscFunctionBegin;
2040   /* MatPivotSetUp(): initialize shift context sctx */
2041   PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx)));
2042 
2043   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2044     sctx.shift_top = info->zeropivot;
2045     for (i=0; i<mbs; i++) {
2046       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2047       d  = (aa)[a->diag[i]];
2048       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2049       v  = aa+ai[i];
2050       nz = ai[i+1] - ai[i];
2051       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2052       if (rs>sctx.shift_top) sctx.shift_top = rs;
2053     }
2054     sctx.shift_top *= 1.1;
2055     sctx.nshift_max = 5;
2056     sctx.shift_lo   = 0.;
2057     sctx.shift_hi   = 1.;
2058   }
2059 
2060   PetscCall(ISGetIndices(ip,&rip));
2061   PetscCall(ISGetIndices(iip,&riip));
2062 
2063   /* allocate working arrays
2064      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2065      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2066   */
2067   PetscCall(PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r));
2068 
2069   do {
2070     sctx.newshift = PETSC_FALSE;
2071 
2072     for (i=0; i<mbs; i++) c2r[i] = mbs;
2073     if (mbs) il[0] = 0;
2074 
2075     for (k = 0; k<mbs; k++) {
2076       /* zero rtmp */
2077       nz    = bi[k+1] - bi[k];
2078       bjtmp = bj + bi[k];
2079       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2080 
2081       /* load in initial unfactored row */
2082       bval = ba + bi[k];
2083       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2084       for (j = jmin; j < jmax; j++) {
2085         col = riip[aj[j]];
2086         if (col >= k) { /* only take upper triangular entry */
2087           rtmp[col] = aa[j];
2088           *bval++   = 0.0; /* for in-place factorization */
2089         }
2090       }
2091       /* shift the diagonal of the matrix: ZeropivotApply() */
2092       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2093 
2094       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2095       dk = rtmp[k];
2096       i  = c2r[k]; /* first row to be added to k_th row  */
2097 
2098       while (i < k) {
2099         nexti = c2r[i]; /* next row to be added to k_th row */
2100 
2101         /* compute multiplier, update diag(k) and U(i,k) */
2102         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2103         uikdi   = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2104         dk     += uikdi*ba[ili]; /* update diag[k] */
2105         ba[ili] = uikdi; /* -U(i,k) */
2106 
2107         /* add multiple of row i to k-th row */
2108         jmin = ili + 1; jmax = bi[i+1];
2109         if (jmin < jmax) {
2110           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2111           /* update il and c2r for row i */
2112           il[i] = jmin;
2113           j     = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2114         }
2115         i = nexti;
2116       }
2117 
2118       /* copy data into U(k,:) */
2119       rs   = 0.0;
2120       jmin = bi[k]; jmax = bi[k+1]-1;
2121       if (jmin < jmax) {
2122         for (j=jmin; j<jmax; j++) {
2123           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2124         }
2125         /* add the k-th row into il and c2r */
2126         il[k] = jmin;
2127         i     = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2128       }
2129 
2130       /* MatPivotCheck() */
2131       sctx.rs = rs;
2132       sctx.pv = dk;
2133       PetscCall(MatPivotCheck(B,A,info,&sctx,i));
2134       if (sctx.newshift) break;
2135       dk = sctx.pv;
2136 
2137       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2138     }
2139   } while (sctx.newshift);
2140 
2141   PetscCall(PetscFree3(rtmp,il,c2r));
2142   PetscCall(ISRestoreIndices(ip,&rip));
2143   PetscCall(ISRestoreIndices(iip,&riip));
2144 
2145   PetscCall(ISIdentity(ip,&perm_identity));
2146   if (perm_identity) {
2147     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2148     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2149     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2150     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2151   } else {
2152     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2153     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2154     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2155     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2156   }
2157 
2158   C->assembled    = PETSC_TRUE;
2159   C->preallocated = PETSC_TRUE;
2160 
2161   PetscCall(PetscLogFlops(C->rmap->n));
2162 
2163   /* MatPivotView() */
2164   if (sctx.nshift) {
2165     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2166       PetscCall(PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top));
2167     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2168       PetscCall(PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
2169     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2170       PetscCall(PetscInfo(A,"number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n",sctx.nshift,(double)info->shiftamount));
2171     }
2172   }
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2177 {
2178   Mat            C = B;
2179   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2180   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2181   IS             ip=b->row,iip = b->icol;
2182   const PetscInt *rip,*riip;
2183   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2184   PetscInt       *ai=a->i,*aj=a->j;
2185   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2186   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2187   PetscBool      perm_identity;
2188   FactorShiftCtx sctx;
2189   PetscReal      rs;
2190   MatScalar      d,*v;
2191 
2192   PetscFunctionBegin;
2193   /* MatPivotSetUp(): initialize shift context sctx */
2194   PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx)));
2195 
2196   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2197     sctx.shift_top = info->zeropivot;
2198     for (i=0; i<mbs; i++) {
2199       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2200       d  = (aa)[a->diag[i]];
2201       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2202       v  = aa+ai[i];
2203       nz = ai[i+1] - ai[i];
2204       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2205       if (rs>sctx.shift_top) sctx.shift_top = rs;
2206     }
2207     sctx.shift_top *= 1.1;
2208     sctx.nshift_max = 5;
2209     sctx.shift_lo   = 0.;
2210     sctx.shift_hi   = 1.;
2211   }
2212 
2213   PetscCall(ISGetIndices(ip,&rip));
2214   PetscCall(ISGetIndices(iip,&riip));
2215 
2216   /* initialization */
2217   PetscCall(PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl));
2218 
2219   do {
2220     sctx.newshift = PETSC_FALSE;
2221 
2222     for (i=0; i<mbs; i++) jl[i] = mbs;
2223     il[0] = 0;
2224 
2225     for (k = 0; k<mbs; k++) {
2226       /* zero rtmp */
2227       nz    = bi[k+1] - bi[k];
2228       bjtmp = bj + bi[k];
2229       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2230 
2231       bval = ba + bi[k];
2232       /* initialize k-th row by the perm[k]-th row of A */
2233       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2234       for (j = jmin; j < jmax; j++) {
2235         col = riip[aj[j]];
2236         if (col >= k) { /* only take upper triangular entry */
2237           rtmp[col] = aa[j];
2238           *bval++   = 0.0; /* for in-place factorization */
2239         }
2240       }
2241       /* shift the diagonal of the matrix */
2242       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2243 
2244       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2245       dk = rtmp[k];
2246       i  = jl[k]; /* first row to be added to k_th row  */
2247 
2248       while (i < k) {
2249         nexti = jl[i]; /* next row to be added to k_th row */
2250 
2251         /* compute multiplier, update diag(k) and U(i,k) */
2252         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2253         uikdi   = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2254         dk     += uikdi*ba[ili];
2255         ba[ili] = uikdi; /* -U(i,k) */
2256 
2257         /* add multiple of row i to k-th row */
2258         jmin = ili + 1; jmax = bi[i+1];
2259         if (jmin < jmax) {
2260           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2261           /* update il and jl for row i */
2262           il[i] = jmin;
2263           j     = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2264         }
2265         i = nexti;
2266       }
2267 
2268       /* shift the diagonals when zero pivot is detected */
2269       /* compute rs=sum of abs(off-diagonal) */
2270       rs   = 0.0;
2271       jmin = bi[k]+1;
2272       nz   = bi[k+1] - jmin;
2273       bcol = bj + jmin;
2274       for (j=0; j<nz; j++) {
2275         rs += PetscAbsScalar(rtmp[bcol[j]]);
2276       }
2277 
2278       sctx.rs = rs;
2279       sctx.pv = dk;
2280       PetscCall(MatPivotCheck(B,A,info,&sctx,k));
2281       if (sctx.newshift) break;
2282       dk = sctx.pv;
2283 
2284       /* copy data into U(k,:) */
2285       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2286       jmin      = bi[k]+1; jmax = bi[k+1];
2287       if (jmin < jmax) {
2288         for (j=jmin; j<jmax; j++) {
2289           col = bj[j]; ba[j] = rtmp[col];
2290         }
2291         /* add the k-th row into il and jl */
2292         il[k] = jmin;
2293         i     = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2294       }
2295     }
2296   } while (sctx.newshift);
2297 
2298   PetscCall(PetscFree3(rtmp,il,jl));
2299   PetscCall(ISRestoreIndices(ip,&rip));
2300   PetscCall(ISRestoreIndices(iip,&riip));
2301 
2302   PetscCall(ISIdentity(ip,&perm_identity));
2303   if (perm_identity) {
2304     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2305     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2306     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2307     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2308   } else {
2309     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2310     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2311     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2312     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2313   }
2314 
2315   C->assembled    = PETSC_TRUE;
2316   C->preallocated = PETSC_TRUE;
2317 
2318   PetscCall(PetscLogFlops(C->rmap->n));
2319   if (sctx.nshift) {
2320     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2321       PetscCall(PetscInfo(A,"number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
2322     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2323       PetscCall(PetscInfo(A,"number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
2324     }
2325   }
2326   PetscFunctionReturn(0);
2327 }
2328 
2329 /*
2330    icc() under revised new data structure.
2331    Factored arrays bj and ba are stored as
2332      U(0,:),...,U(i,:),U(n-1,:)
2333 
2334    ui=fact->i is an array of size n+1, in which
2335    ui+
2336      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2337      ui[n]:  points to U(n-1,n-1)+1
2338 
2339   udiag=fact->diag is an array of size n,in which
2340      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2341 
2342    U(i,:) contains udiag[i] as its last entry, i.e.,
2343     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2344 */
2345 
2346 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2347 {
2348   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2349   Mat_SeqSBAIJ       *b;
2350   PetscBool          perm_identity,missing;
2351   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2352   const PetscInt     *rip,*riip;
2353   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2354   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2355   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2356   PetscReal          fill          =info->fill,levels=info->levels;
2357   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2358   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2359   PetscBT            lnkbt;
2360   IS                 iperm;
2361 
2362   PetscFunctionBegin;
2363   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n);
2364   PetscCall(MatMissingDiagonal(A,&missing,&d));
2365   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,d);
2366   PetscCall(ISIdentity(perm,&perm_identity));
2367   PetscCall(ISInvertPermutation(perm,PETSC_DECIDE,&iperm));
2368 
2369   PetscCall(PetscMalloc1(am+1,&ui));
2370   PetscCall(PetscMalloc1(am+1,&udiag));
2371   ui[0] = 0;
2372 
2373   /* ICC(0) without matrix ordering: simply rearrange column indices */
2374   if (!levels && perm_identity) {
2375     for (i=0; i<am; i++) {
2376       ncols    = ai[i+1] - a->diag[i];
2377       ui[i+1]  = ui[i] + ncols;
2378       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2379     }
2380     PetscCall(PetscMalloc1(ui[am]+1,&uj));
2381     cols = uj;
2382     for (i=0; i<am; i++) {
2383       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2384       ncols = ai[i+1] - a->diag[i] -1;
2385       for (j=0; j<ncols; j++) *cols++ = aj[j];
2386       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2387     }
2388   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2389     PetscCall(ISGetIndices(iperm,&riip));
2390     PetscCall(ISGetIndices(perm,&rip));
2391 
2392     /* initialization */
2393     PetscCall(PetscMalloc1(am+1,&ajtmp));
2394 
2395     /* jl: linked list for storing indices of the pivot rows
2396        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2397     PetscCall(PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il));
2398     for (i=0; i<am; i++) {
2399       jl[i] = am; il[i] = 0;
2400     }
2401 
2402     /* create and initialize a linked list for storing column indices of the active row k */
2403     nlnk = am + 1;
2404     PetscCall(PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt));
2405 
2406     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2407     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space));
2408     current_space     = free_space;
2409     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl));
2410     current_space_lvl = free_space_lvl;
2411 
2412     for (k=0; k<am; k++) {  /* for each active row k */
2413       /* initialize lnk by the column indices of row rip[k] of A */
2414       nzk   = 0;
2415       ncols = ai[rip[k]+1] - ai[rip[k]];
2416       PetscCheck(ncols,PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,rip[k],k);
2417       ncols_upper = 0;
2418       for (j=0; j<ncols; j++) {
2419         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2420         if (riip[i] >= k) { /* only take upper triangular entry */
2421           ajtmp[ncols_upper] = i;
2422           ncols_upper++;
2423         }
2424       }
2425       PetscCall(PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,&nlnk,lnk,lnk_lvl,lnkbt));
2426       nzk += nlnk;
2427 
2428       /* update lnk by computing fill-in for each pivot row to be merged in */
2429       prow = jl[k]; /* 1st pivot row */
2430 
2431       while (prow < k) {
2432         nextprow = jl[prow];
2433 
2434         /* merge prow into k-th row */
2435         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2436         jmax  = ui[prow+1];
2437         ncols = jmax-jmin;
2438         i     = jmin - ui[prow];
2439         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2440         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2441         j     = *(uj - 1);
2442         PetscCall(PetscICCLLAddSorted(ncols,cols,levels,uj,am,&nlnk,lnk,lnk_lvl,lnkbt,j));
2443         nzk  += nlnk;
2444 
2445         /* update il and jl for prow */
2446         if (jmin < jmax) {
2447           il[prow] = jmin;
2448           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2449         }
2450         prow = nextprow;
2451       }
2452 
2453       /* if free space is not available, make more free space */
2454       if (current_space->local_remaining<nzk) {
2455         i    = am - k + 1; /* num of unfactored rows */
2456         i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2457         PetscCall(PetscFreeSpaceGet(i,&current_space));
2458         PetscCall(PetscFreeSpaceGet(i,&current_space_lvl));
2459         reallocs++;
2460       }
2461 
2462       /* copy data into free_space and free_space_lvl, then initialize lnk */
2463       PetscCheck(nzk != 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %" PetscInt_FMT " in ICC matrix factor",k);
2464       PetscCall(PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt));
2465 
2466       /* add the k-th row into il and jl */
2467       if (nzk > 1) {
2468         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2469         jl[k] = jl[i]; jl[i] = k;
2470         il[k] = ui[k] + 1;
2471       }
2472       uj_ptr[k]     = current_space->array;
2473       uj_lvl_ptr[k] = current_space_lvl->array;
2474 
2475       current_space->array           += nzk;
2476       current_space->local_used      += nzk;
2477       current_space->local_remaining -= nzk;
2478 
2479       current_space_lvl->array           += nzk;
2480       current_space_lvl->local_used      += nzk;
2481       current_space_lvl->local_remaining -= nzk;
2482 
2483       ui[k+1] = ui[k] + nzk;
2484     }
2485 
2486     PetscCall(ISRestoreIndices(perm,&rip));
2487     PetscCall(ISRestoreIndices(iperm,&riip));
2488     PetscCall(PetscFree4(uj_ptr,uj_lvl_ptr,jl,il));
2489     PetscCall(PetscFree(ajtmp));
2490 
2491     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2492     PetscCall(PetscMalloc1(ui[am]+1,&uj));
2493     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag)); /* store matrix factor  */
2494     PetscCall(PetscIncompleteLLDestroy(lnk,lnkbt));
2495     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2496 
2497   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2498 
2499   /* put together the new matrix in MATSEQSBAIJ format */
2500   b               = (Mat_SeqSBAIJ*)(fact)->data;
2501   b->singlemalloc = PETSC_FALSE;
2502 
2503   PetscCall(PetscMalloc1(ui[am]+1,&b->a));
2504 
2505   b->j             = uj;
2506   b->i             = ui;
2507   b->diag          = udiag;
2508   b->free_diag     = PETSC_TRUE;
2509   b->ilen          = NULL;
2510   b->imax          = NULL;
2511   b->row           = perm;
2512   b->col           = perm;
2513   PetscCall(PetscObjectReference((PetscObject)perm));
2514   PetscCall(PetscObjectReference((PetscObject)perm));
2515   b->icol          = iperm;
2516   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2517 
2518   PetscCall(PetscMalloc1(am+1,&b->solve_work));
2519   PetscCall(PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar))));
2520 
2521   b->maxnz   = b->nz = ui[am];
2522   b->free_a  = PETSC_TRUE;
2523   b->free_ij = PETSC_TRUE;
2524 
2525   fact->info.factor_mallocs   = reallocs;
2526   fact->info.fill_ratio_given = fill;
2527   if (ai[am] != 0) {
2528     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2529     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2530   } else {
2531     fact->info.fill_ratio_needed = 0.0;
2532   }
2533 #if defined(PETSC_USE_INFO)
2534   if (ai[am] != 0) {
2535     PetscReal af = fact->info.fill_ratio_needed;
2536     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af));
2537     PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
2538     PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af));
2539   } else {
2540     PetscCall(PetscInfo(A,"Empty matrix\n"));
2541   }
2542 #endif
2543   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2548 {
2549   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2550   Mat_SeqSBAIJ       *b;
2551   PetscBool          perm_identity,missing;
2552   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2553   const PetscInt     *rip,*riip;
2554   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2555   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2556   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2557   PetscReal          fill          =info->fill,levels=info->levels;
2558   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2559   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2560   PetscBT            lnkbt;
2561   IS                 iperm;
2562 
2563   PetscFunctionBegin;
2564   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n);
2565   PetscCall(MatMissingDiagonal(A,&missing,&d));
2566   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,d);
2567   PetscCall(ISIdentity(perm,&perm_identity));
2568   PetscCall(ISInvertPermutation(perm,PETSC_DECIDE,&iperm));
2569 
2570   PetscCall(PetscMalloc1(am+1,&ui));
2571   PetscCall(PetscMalloc1(am+1,&udiag));
2572   ui[0] = 0;
2573 
2574   /* ICC(0) without matrix ordering: simply copies fill pattern */
2575   if (!levels && perm_identity) {
2576 
2577     for (i=0; i<am; i++) {
2578       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2579       udiag[i] = ui[i];
2580     }
2581     PetscCall(PetscMalloc1(ui[am]+1,&uj));
2582     cols = uj;
2583     for (i=0; i<am; i++) {
2584       aj    = a->j + a->diag[i];
2585       ncols = ui[i+1] - ui[i];
2586       for (j=0; j<ncols; j++) *cols++ = *aj++;
2587     }
2588   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2589     PetscCall(ISGetIndices(iperm,&riip));
2590     PetscCall(ISGetIndices(perm,&rip));
2591 
2592     /* initialization */
2593     PetscCall(PetscMalloc1(am+1,&ajtmp));
2594 
2595     /* jl: linked list for storing indices of the pivot rows
2596        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2597     PetscCall(PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il));
2598     for (i=0; i<am; i++) {
2599       jl[i] = am; il[i] = 0;
2600     }
2601 
2602     /* create and initialize a linked list for storing column indices of the active row k */
2603     nlnk = am + 1;
2604     PetscCall(PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt));
2605 
2606     /* initial FreeSpace size is fill*(ai[am]+1) */
2607     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space));
2608     current_space     = free_space;
2609     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl));
2610     current_space_lvl = free_space_lvl;
2611 
2612     for (k=0; k<am; k++) {  /* for each active row k */
2613       /* initialize lnk by the column indices of row rip[k] of A */
2614       nzk   = 0;
2615       ncols = ai[rip[k]+1] - ai[rip[k]];
2616       PetscCheck(ncols,PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,rip[k],k);
2617       ncols_upper = 0;
2618       for (j=0; j<ncols; j++) {
2619         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2620         if (riip[i] >= k) { /* only take upper triangular entry */
2621           ajtmp[ncols_upper] = i;
2622           ncols_upper++;
2623         }
2624       }
2625       PetscCall(PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,&nlnk,lnk,lnk_lvl,lnkbt));
2626       nzk += nlnk;
2627 
2628       /* update lnk by computing fill-in for each pivot row to be merged in */
2629       prow = jl[k]; /* 1st pivot row */
2630 
2631       while (prow < k) {
2632         nextprow = jl[prow];
2633 
2634         /* merge prow into k-th row */
2635         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2636         jmax  = ui[prow+1];
2637         ncols = jmax-jmin;
2638         i     = jmin - ui[prow];
2639         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2640         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2641         j     = *(uj - 1);
2642         PetscCall(PetscICCLLAddSorted(ncols,cols,levels,uj,am,&nlnk,lnk,lnk_lvl,lnkbt,j));
2643         nzk  += nlnk;
2644 
2645         /* update il and jl for prow */
2646         if (jmin < jmax) {
2647           il[prow] = jmin;
2648           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2649         }
2650         prow = nextprow;
2651       }
2652 
2653       /* if free space is not available, make more free space */
2654       if (current_space->local_remaining<nzk) {
2655         i    = am - k + 1; /* num of unfactored rows */
2656         i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2657         PetscCall(PetscFreeSpaceGet(i,&current_space));
2658         PetscCall(PetscFreeSpaceGet(i,&current_space_lvl));
2659         reallocs++;
2660       }
2661 
2662       /* copy data into free_space and free_space_lvl, then initialize lnk */
2663       PetscCheck(nzk,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %" PetscInt_FMT " in ICC matrix factor",k);
2664       PetscCall(PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt));
2665 
2666       /* add the k-th row into il and jl */
2667       if (nzk > 1) {
2668         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2669         jl[k] = jl[i]; jl[i] = k;
2670         il[k] = ui[k] + 1;
2671       }
2672       uj_ptr[k]     = current_space->array;
2673       uj_lvl_ptr[k] = current_space_lvl->array;
2674 
2675       current_space->array           += nzk;
2676       current_space->local_used      += nzk;
2677       current_space->local_remaining -= nzk;
2678 
2679       current_space_lvl->array           += nzk;
2680       current_space_lvl->local_used      += nzk;
2681       current_space_lvl->local_remaining -= nzk;
2682 
2683       ui[k+1] = ui[k] + nzk;
2684     }
2685 
2686 #if defined(PETSC_USE_INFO)
2687     if (ai[am] != 0) {
2688       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2689       PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af));
2690       PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
2691       PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af));
2692     } else {
2693       PetscCall(PetscInfo(A,"Empty matrix\n"));
2694     }
2695 #endif
2696 
2697     PetscCall(ISRestoreIndices(perm,&rip));
2698     PetscCall(ISRestoreIndices(iperm,&riip));
2699     PetscCall(PetscFree4(uj_ptr,uj_lvl_ptr,jl,il));
2700     PetscCall(PetscFree(ajtmp));
2701 
2702     /* destroy list of free space and other temporary array(s) */
2703     PetscCall(PetscMalloc1(ui[am]+1,&uj));
2704     PetscCall(PetscFreeSpaceContiguous(&free_space,uj));
2705     PetscCall(PetscIncompleteLLDestroy(lnk,lnkbt));
2706     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2707 
2708   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2709 
2710   /* put together the new matrix in MATSEQSBAIJ format */
2711 
2712   b               = (Mat_SeqSBAIJ*)fact->data;
2713   b->singlemalloc = PETSC_FALSE;
2714 
2715   PetscCall(PetscMalloc1(ui[am]+1,&b->a));
2716 
2717   b->j         = uj;
2718   b->i         = ui;
2719   b->diag      = udiag;
2720   b->free_diag = PETSC_TRUE;
2721   b->ilen      = NULL;
2722   b->imax      = NULL;
2723   b->row       = perm;
2724   b->col       = perm;
2725 
2726   PetscCall(PetscObjectReference((PetscObject)perm));
2727   PetscCall(PetscObjectReference((PetscObject)perm));
2728 
2729   b->icol          = iperm;
2730   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2731   PetscCall(PetscMalloc1(am+1,&b->solve_work));
2732   PetscCall(PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))));
2733   b->maxnz         = b->nz = ui[am];
2734   b->free_a        = PETSC_TRUE;
2735   b->free_ij       = PETSC_TRUE;
2736 
2737   fact->info.factor_mallocs   = reallocs;
2738   fact->info.fill_ratio_given = fill;
2739   if (ai[am] != 0) {
2740     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2741   } else {
2742     fact->info.fill_ratio_needed = 0.0;
2743   }
2744   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2745   PetscFunctionReturn(0);
2746 }
2747 
2748 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2749 {
2750   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2751   Mat_SeqSBAIJ       *b;
2752   PetscBool          perm_identity,missing;
2753   PetscReal          fill = info->fill;
2754   const PetscInt     *rip,*riip;
2755   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2756   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2757   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2758   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2759   PetscBT            lnkbt;
2760   IS                 iperm;
2761 
2762   PetscFunctionBegin;
2763   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n);
2764   PetscCall(MatMissingDiagonal(A,&missing,&i));
2765   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i);
2766 
2767   /* check whether perm is the identity mapping */
2768   PetscCall(ISIdentity(perm,&perm_identity));
2769   PetscCall(ISInvertPermutation(perm,PETSC_DECIDE,&iperm));
2770   PetscCall(ISGetIndices(iperm,&riip));
2771   PetscCall(ISGetIndices(perm,&rip));
2772 
2773   /* initialization */
2774   PetscCall(PetscMalloc1(am+1,&ui));
2775   PetscCall(PetscMalloc1(am+1,&udiag));
2776   ui[0] = 0;
2777 
2778   /* jl: linked list for storing indices of the pivot rows
2779      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2780   PetscCall(PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols));
2781   for (i=0; i<am; i++) {
2782     jl[i] = am; il[i] = 0;
2783   }
2784 
2785   /* create and initialize a linked list for storing column indices of the active row k */
2786   nlnk = am + 1;
2787   PetscCall(PetscLLCreate(am,am,nlnk,lnk,lnkbt));
2788 
2789   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2790   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space));
2791   current_space = free_space;
2792 
2793   for (k=0; k<am; k++) {  /* for each active row k */
2794     /* initialize lnk by the column indices of row rip[k] of A */
2795     nzk   = 0;
2796     ncols = ai[rip[k]+1] - ai[rip[k]];
2797     PetscCheck(ncols,PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,rip[k],k);
2798     ncols_upper = 0;
2799     for (j=0; j<ncols; j++) {
2800       i = riip[*(aj + ai[rip[k]] + j)];
2801       if (i >= k) { /* only take upper triangular entry */
2802         cols[ncols_upper] = i;
2803         ncols_upper++;
2804       }
2805     }
2806     PetscCall(PetscLLAdd(ncols_upper,cols,am,&nlnk,lnk,lnkbt));
2807     nzk += nlnk;
2808 
2809     /* update lnk by computing fill-in for each pivot row to be merged in */
2810     prow = jl[k]; /* 1st pivot row */
2811 
2812     while (prow < k) {
2813       nextprow = jl[prow];
2814       /* merge prow into k-th row */
2815       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2816       jmax   = ui[prow+1];
2817       ncols  = jmax-jmin;
2818       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2819       PetscCall(PetscLLAddSorted(ncols,uj_ptr,am,&nlnk,lnk,lnkbt));
2820       nzk   += nlnk;
2821 
2822       /* update il and jl for prow */
2823       if (jmin < jmax) {
2824         il[prow] = jmin;
2825         j        = *uj_ptr;
2826         jl[prow] = jl[j];
2827         jl[j]    = prow;
2828       }
2829       prow = nextprow;
2830     }
2831 
2832     /* if free space is not available, make more free space */
2833     if (current_space->local_remaining<nzk) {
2834       i    = am - k + 1; /* num of unfactored rows */
2835       i    = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2836       PetscCall(PetscFreeSpaceGet(i,&current_space));
2837       reallocs++;
2838     }
2839 
2840     /* copy data into free space, then initialize lnk */
2841     PetscCall(PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt));
2842 
2843     /* add the k-th row into il and jl */
2844     if (nzk > 1) {
2845       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2846       jl[k] = jl[i]; jl[i] = k;
2847       il[k] = ui[k] + 1;
2848     }
2849     ui_ptr[k] = current_space->array;
2850 
2851     current_space->array           += nzk;
2852     current_space->local_used      += nzk;
2853     current_space->local_remaining -= nzk;
2854 
2855     ui[k+1] = ui[k] + nzk;
2856   }
2857 
2858   PetscCall(ISRestoreIndices(perm,&rip));
2859   PetscCall(ISRestoreIndices(iperm,&riip));
2860   PetscCall(PetscFree4(ui_ptr,jl,il,cols));
2861 
2862   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2863   PetscCall(PetscMalloc1(ui[am]+1,&uj));
2864   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag)); /* store matrix factor */
2865   PetscCall(PetscLLDestroy(lnk,lnkbt));
2866 
2867   /* put together the new matrix in MATSEQSBAIJ format */
2868 
2869   b               = (Mat_SeqSBAIJ*)fact->data;
2870   b->singlemalloc = PETSC_FALSE;
2871   b->free_a       = PETSC_TRUE;
2872   b->free_ij      = PETSC_TRUE;
2873 
2874   PetscCall(PetscMalloc1(ui[am]+1,&b->a));
2875 
2876   b->j         = uj;
2877   b->i         = ui;
2878   b->diag      = udiag;
2879   b->free_diag = PETSC_TRUE;
2880   b->ilen      = NULL;
2881   b->imax      = NULL;
2882   b->row       = perm;
2883   b->col       = perm;
2884 
2885   PetscCall(PetscObjectReference((PetscObject)perm));
2886   PetscCall(PetscObjectReference((PetscObject)perm));
2887 
2888   b->icol          = iperm;
2889   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2890 
2891   PetscCall(PetscMalloc1(am+1,&b->solve_work));
2892   PetscCall(PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar))));
2893 
2894   b->maxnz = b->nz = ui[am];
2895 
2896   fact->info.factor_mallocs   = reallocs;
2897   fact->info.fill_ratio_given = fill;
2898   if (ai[am] != 0) {
2899     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2900     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2901   } else {
2902     fact->info.fill_ratio_needed = 0.0;
2903   }
2904 #if defined(PETSC_USE_INFO)
2905   if (ai[am] != 0) {
2906     PetscReal af = fact->info.fill_ratio_needed;
2907     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af));
2908     PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
2909     PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af));
2910   } else {
2911     PetscCall(PetscInfo(A,"Empty matrix\n"));
2912   }
2913 #endif
2914   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2915   PetscFunctionReturn(0);
2916 }
2917 
2918 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2919 {
2920   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2921   Mat_SeqSBAIJ       *b;
2922   PetscBool          perm_identity,missing;
2923   PetscReal          fill = info->fill;
2924   const PetscInt     *rip,*riip;
2925   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2926   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2927   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2928   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2929   PetscBT            lnkbt;
2930   IS                 iperm;
2931 
2932   PetscFunctionBegin;
2933   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n);
2934   PetscCall(MatMissingDiagonal(A,&missing,&i));
2935   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i);
2936 
2937   /* check whether perm is the identity mapping */
2938   PetscCall(ISIdentity(perm,&perm_identity));
2939   PetscCall(ISInvertPermutation(perm,PETSC_DECIDE,&iperm));
2940   PetscCall(ISGetIndices(iperm,&riip));
2941   PetscCall(ISGetIndices(perm,&rip));
2942 
2943   /* initialization */
2944   PetscCall(PetscMalloc1(am+1,&ui));
2945   ui[0] = 0;
2946 
2947   /* jl: linked list for storing indices of the pivot rows
2948      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2949   PetscCall(PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols));
2950   for (i=0; i<am; i++) {
2951     jl[i] = am; il[i] = 0;
2952   }
2953 
2954   /* create and initialize a linked list for storing column indices of the active row k */
2955   nlnk = am + 1;
2956   PetscCall(PetscLLCreate(am,am,nlnk,lnk,lnkbt));
2957 
2958   /* initial FreeSpace size is fill*(ai[am]+1) */
2959   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space));
2960   current_space = free_space;
2961 
2962   for (k=0; k<am; k++) {  /* for each active row k */
2963     /* initialize lnk by the column indices of row rip[k] of A */
2964     nzk   = 0;
2965     ncols = ai[rip[k]+1] - ai[rip[k]];
2966     PetscCheck(ncols,PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,rip[k],k);
2967     ncols_upper = 0;
2968     for (j=0; j<ncols; j++) {
2969       i = riip[*(aj + ai[rip[k]] + j)];
2970       if (i >= k) { /* only take upper triangular entry */
2971         cols[ncols_upper] = i;
2972         ncols_upper++;
2973       }
2974     }
2975     PetscCall(PetscLLAdd(ncols_upper,cols,am,&nlnk,lnk,lnkbt));
2976     nzk += nlnk;
2977 
2978     /* update lnk by computing fill-in for each pivot row to be merged in */
2979     prow = jl[k]; /* 1st pivot row */
2980 
2981     while (prow < k) {
2982       nextprow = jl[prow];
2983       /* merge prow into k-th row */
2984       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2985       jmax   = ui[prow+1];
2986       ncols  = jmax-jmin;
2987       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2988       PetscCall(PetscLLAddSorted(ncols,uj_ptr,am,&nlnk,lnk,lnkbt));
2989       nzk   += nlnk;
2990 
2991       /* update il and jl for prow */
2992       if (jmin < jmax) {
2993         il[prow] = jmin;
2994         j        = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
2995       }
2996       prow = nextprow;
2997     }
2998 
2999     /* if free space is not available, make more free space */
3000     if (current_space->local_remaining<nzk) {
3001       i    = am - k + 1; /* num of unfactored rows */
3002       i    = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3003       PetscCall(PetscFreeSpaceGet(i,&current_space));
3004       reallocs++;
3005     }
3006 
3007     /* copy data into free space, then initialize lnk */
3008     PetscCall(PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt));
3009 
3010     /* add the k-th row into il and jl */
3011     if (nzk-1 > 0) {
3012       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3013       jl[k] = jl[i]; jl[i] = k;
3014       il[k] = ui[k] + 1;
3015     }
3016     ui_ptr[k] = current_space->array;
3017 
3018     current_space->array           += nzk;
3019     current_space->local_used      += nzk;
3020     current_space->local_remaining -= nzk;
3021 
3022     ui[k+1] = ui[k] + nzk;
3023   }
3024 
3025 #if defined(PETSC_USE_INFO)
3026   if (ai[am] != 0) {
3027     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3028     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af));
3029     PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
3030     PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af));
3031   } else {
3032     PetscCall(PetscInfo(A,"Empty matrix\n"));
3033   }
3034 #endif
3035 
3036   PetscCall(ISRestoreIndices(perm,&rip));
3037   PetscCall(ISRestoreIndices(iperm,&riip));
3038   PetscCall(PetscFree4(ui_ptr,jl,il,cols));
3039 
3040   /* destroy list of free space and other temporary array(s) */
3041   PetscCall(PetscMalloc1(ui[am]+1,&uj));
3042   PetscCall(PetscFreeSpaceContiguous(&free_space,uj));
3043   PetscCall(PetscLLDestroy(lnk,lnkbt));
3044 
3045   /* put together the new matrix in MATSEQSBAIJ format */
3046 
3047   b               = (Mat_SeqSBAIJ*)fact->data;
3048   b->singlemalloc = PETSC_FALSE;
3049   b->free_a       = PETSC_TRUE;
3050   b->free_ij      = PETSC_TRUE;
3051 
3052   PetscCall(PetscMalloc1(ui[am]+1,&b->a));
3053 
3054   b->j    = uj;
3055   b->i    = ui;
3056   b->diag = NULL;
3057   b->ilen = NULL;
3058   b->imax = NULL;
3059   b->row  = perm;
3060   b->col  = perm;
3061 
3062   PetscCall(PetscObjectReference((PetscObject)perm));
3063   PetscCall(PetscObjectReference((PetscObject)perm));
3064 
3065   b->icol          = iperm;
3066   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3067 
3068   PetscCall(PetscMalloc1(am+1,&b->solve_work));
3069   PetscCall(PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))));
3070   b->maxnz = b->nz = ui[am];
3071 
3072   fact->info.factor_mallocs   = reallocs;
3073   fact->info.fill_ratio_given = fill;
3074   if (ai[am] != 0) {
3075     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3076   } else {
3077     fact->info.fill_ratio_needed = 0.0;
3078   }
3079   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3080   PetscFunctionReturn(0);
3081 }
3082 
3083 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3084 {
3085   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3086   PetscInt          n   = A->rmap->n;
3087   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3088   PetscScalar       *x,sum;
3089   const PetscScalar *b;
3090   const MatScalar   *aa = a->a,*v;
3091   PetscInt          i,nz;
3092 
3093   PetscFunctionBegin;
3094   if (!n) PetscFunctionReturn(0);
3095 
3096   PetscCall(VecGetArrayRead(bb,&b));
3097   PetscCall(VecGetArrayWrite(xx,&x));
3098 
3099   /* forward solve the lower triangular */
3100   x[0] = b[0];
3101   v    = aa;
3102   vi   = aj;
3103   for (i=1; i<n; i++) {
3104     nz  = ai[i+1] - ai[i];
3105     sum = b[i];
3106     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3107     v   += nz;
3108     vi  += nz;
3109     x[i] = sum;
3110   }
3111 
3112   /* backward solve the upper triangular */
3113   for (i=n-1; i>=0; i--) {
3114     v   = aa + adiag[i+1] + 1;
3115     vi  = aj + adiag[i+1] + 1;
3116     nz  = adiag[i] - adiag[i+1]-1;
3117     sum = x[i];
3118     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3119     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3120   }
3121 
3122   PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n));
3123   PetscCall(VecRestoreArrayRead(bb,&b));
3124   PetscCall(VecRestoreArrayWrite(xx,&x));
3125   PetscFunctionReturn(0);
3126 }
3127 
3128 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3129 {
3130   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
3131   IS                iscol = a->col,isrow = a->row;
3132   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3133   const PetscInt    *rout,*cout,*r,*c;
3134   PetscScalar       *x,*tmp,sum;
3135   const PetscScalar *b;
3136   const MatScalar   *aa = a->a,*v;
3137 
3138   PetscFunctionBegin;
3139   if (!n) PetscFunctionReturn(0);
3140 
3141   PetscCall(VecGetArrayRead(bb,&b));
3142   PetscCall(VecGetArrayWrite(xx,&x));
3143   tmp  = a->solve_work;
3144 
3145   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
3146   PetscCall(ISGetIndices(iscol,&cout)); c = cout;
3147 
3148   /* forward solve the lower triangular */
3149   tmp[0] = b[r[0]];
3150   v      = aa;
3151   vi     = aj;
3152   for (i=1; i<n; i++) {
3153     nz  = ai[i+1] - ai[i];
3154     sum = b[r[i]];
3155     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3156     tmp[i] = sum;
3157     v     += nz; vi += nz;
3158   }
3159 
3160   /* backward solve the upper triangular */
3161   for (i=n-1; i>=0; i--) {
3162     v   = aa + adiag[i+1]+1;
3163     vi  = aj + adiag[i+1]+1;
3164     nz  = adiag[i]-adiag[i+1]-1;
3165     sum = tmp[i];
3166     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3167     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3168   }
3169 
3170   PetscCall(ISRestoreIndices(isrow,&rout));
3171   PetscCall(ISRestoreIndices(iscol,&cout));
3172   PetscCall(VecRestoreArrayRead(bb,&b));
3173   PetscCall(VecRestoreArrayWrite(xx,&x));
3174   PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n));
3175   PetscFunctionReturn(0);
3176 }
3177 
3178 /*
3179     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3180 */
3181 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3182 {
3183   Mat            B = *fact;
3184   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b;
3185   IS             isicol;
3186   const PetscInt *r,*ic;
3187   PetscInt       i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3188   PetscInt       *bi,*bj,*bdiag,*bdiag_rev;
3189   PetscInt       row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3190   PetscInt       nlnk,*lnk;
3191   PetscBT        lnkbt;
3192   PetscBool      row_identity,icol_identity;
3193   MatScalar      *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3194   const PetscInt *ics;
3195   PetscInt       j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3196   PetscReal      dt     =info->dt,shift=info->shiftamount;
3197   PetscInt       dtcount=(PetscInt)info->dtcount,nnz_max;
3198   PetscBool      missing;
3199 
3200   PetscFunctionBegin;
3201   if (dt      == PETSC_DEFAULT) dt = 0.005;
3202   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3203 
3204   /* ------- symbolic factorization, can be reused ---------*/
3205   PetscCall(MatMissingDiagonal(A,&missing,&i));
3206   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i);
3207   adiag=a->diag;
3208 
3209   PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol));
3210 
3211   /* bdiag is location of diagonal in factor */
3212   PetscCall(PetscMalloc1(n+1,&bdiag));     /* becomes b->diag */
3213   PetscCall(PetscMalloc1(n+1,&bdiag_rev)); /* temporary */
3214 
3215   /* allocate row pointers bi */
3216   PetscCall(PetscMalloc1(2*n+2,&bi));
3217 
3218   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3219   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3220   nnz_max = ai[n]+2*n*dtcount+2;
3221 
3222   PetscCall(PetscMalloc1(nnz_max+1,&bj));
3223   PetscCall(PetscMalloc1(nnz_max+1,&ba));
3224 
3225   /* put together the new matrix */
3226   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL));
3227   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)isicol));
3228   b    = (Mat_SeqAIJ*)B->data;
3229 
3230   b->free_a       = PETSC_TRUE;
3231   b->free_ij      = PETSC_TRUE;
3232   b->singlemalloc = PETSC_FALSE;
3233 
3234   b->a    = ba;
3235   b->j    = bj;
3236   b->i    = bi;
3237   b->diag = bdiag;
3238   b->ilen = NULL;
3239   b->imax = NULL;
3240   b->row  = isrow;
3241   b->col  = iscol;
3242   PetscCall(PetscObjectReference((PetscObject)isrow));
3243   PetscCall(PetscObjectReference((PetscObject)iscol));
3244   b->icol = isicol;
3245 
3246   PetscCall(PetscMalloc1(n+1,&b->solve_work));
3247   PetscCall(PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar))));
3248   b->maxnz = nnz_max;
3249 
3250   B->factortype            = MAT_FACTOR_ILUDT;
3251   B->info.factor_mallocs   = 0;
3252   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3253   /* ------- end of symbolic factorization ---------*/
3254 
3255   PetscCall(ISGetIndices(isrow,&r));
3256   PetscCall(ISGetIndices(isicol,&ic));
3257   ics  = ic;
3258 
3259   /* linked list for storing column indices of the active row */
3260   nlnk = n + 1;
3261   PetscCall(PetscLLCreate(n,n,nlnk,lnk,lnkbt));
3262 
3263   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3264   PetscCall(PetscMalloc2(n,&im,n,&jtmp));
3265   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3266   PetscCall(PetscMalloc2(n,&rtmp,n,&vtmp));
3267   PetscCall(PetscArrayzero(rtmp,n));
3268 
3269   bi[0]        = 0;
3270   bdiag[0]     = nnz_max-1; /* location of diag[0] in factor B */
3271   bdiag_rev[n] = bdiag[0];
3272   bi[2*n+1]    = bdiag[0]+1; /* endof bj and ba array */
3273   for (i=0; i<n; i++) {
3274     /* copy initial fill into linked list */
3275     nzi = ai[r[i]+1] - ai[r[i]];
3276     PetscCheck(nzi,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,r[i],i);
3277     nzi_al = adiag[r[i]] - ai[r[i]];
3278     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3279     ajtmp  = aj + ai[r[i]];
3280     PetscCall(PetscLLAddPerm(nzi,ajtmp,ic,n,&nlnk,lnk,lnkbt));
3281 
3282     /* load in initial (unfactored row) */
3283     aatmp = a->a + ai[r[i]];
3284     for (j=0; j<nzi; j++) {
3285       rtmp[ics[*ajtmp++]] = *aatmp++;
3286     }
3287 
3288     /* add pivot rows into linked list */
3289     row = lnk[n];
3290     while (row < i) {
3291       nzi_bl = bi[row+1] - bi[row] + 1;
3292       bjtmp  = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3293       PetscCall(PetscLLAddSortedLU(bjtmp,row,&nlnk,lnk,lnkbt,i,nzi_bl,im));
3294       nzi   += nlnk;
3295       row    = lnk[row];
3296     }
3297 
3298     /* copy data from lnk into jtmp, then initialize lnk */
3299     PetscCall(PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt));
3300 
3301     /* numerical factorization */
3302     bjtmp = jtmp;
3303     row   = *bjtmp++; /* 1st pivot row */
3304     while (row < i) {
3305       pc         = rtmp + row;
3306       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3307       multiplier = (*pc) * (*pv);
3308       *pc        = multiplier;
3309       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3310         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3311         pv = ba + bdiag[row+1] + 1;
3312         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3313         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3314         PetscCall(PetscLogFlops(1+2.0*nz));
3315       }
3316       row = *bjtmp++;
3317     }
3318 
3319     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3320     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3321     nzi_bl   = 0; j = 0;
3322     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3323       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3324       nzi_bl++; j++;
3325     }
3326     nzi_bu = nzi - nzi_bl -1;
3327     while (j < nzi) {
3328       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3329       j++;
3330     }
3331 
3332     bjtmp = bj + bi[i];
3333     batmp = ba + bi[i];
3334     /* apply level dropping rule to L part */
3335     ncut = nzi_al + dtcount;
3336     if (ncut < nzi_bl) {
3337       PetscCall(PetscSortSplit(ncut,nzi_bl,vtmp,jtmp));
3338       PetscCall(PetscSortIntWithScalarArray(ncut,jtmp,vtmp));
3339     } else {
3340       ncut = nzi_bl;
3341     }
3342     for (j=0; j<ncut; j++) {
3343       bjtmp[j] = jtmp[j];
3344       batmp[j] = vtmp[j];
3345     }
3346     bi[i+1] = bi[i] + ncut;
3347     nzi     = ncut + 1;
3348 
3349     /* apply level dropping rule to U part */
3350     ncut = nzi_au + dtcount;
3351     if (ncut < nzi_bu) {
3352       PetscCall(PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1));
3353       PetscCall(PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1));
3354     } else {
3355       ncut = nzi_bu;
3356     }
3357     nzi += ncut;
3358 
3359     /* mark bdiagonal */
3360     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3361     bdiag_rev[n-i-1] = bdiag[i+1];
3362     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3363     bjtmp            = bj + bdiag[i];
3364     batmp            = ba + bdiag[i];
3365     *bjtmp           = i;
3366     *batmp           = diag_tmp; /* rtmp[i]; */
3367     if (*batmp == 0.0) {
3368       *batmp = dt+shift;
3369     }
3370     *batmp = 1.0/(*batmp); /* invert diagonal entries for simpler triangular solves */
3371 
3372     bjtmp = bj + bdiag[i+1]+1;
3373     batmp = ba + bdiag[i+1]+1;
3374     for (k=0; k<ncut; k++) {
3375       bjtmp[k] = jtmp[nzi_bl+1+k];
3376       batmp[k] = vtmp[nzi_bl+1+k];
3377     }
3378 
3379     im[i] = nzi;   /* used by PetscLLAddSortedLU() */
3380   } /* for (i=0; i<n; i++) */
3381   PetscCheck(bi[n] < bdiag[n],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT,bi[n],bdiag[n]);
3382 
3383   PetscCall(ISRestoreIndices(isrow,&r));
3384   PetscCall(ISRestoreIndices(isicol,&ic));
3385 
3386   PetscCall(PetscLLDestroy(lnk,lnkbt));
3387   PetscCall(PetscFree2(im,jtmp));
3388   PetscCall(PetscFree2(rtmp,vtmp));
3389   PetscCall(PetscFree(bdiag_rev));
3390 
3391   PetscCall(PetscLogFlops(B->cmap->n));
3392   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3393 
3394   PetscCall(ISIdentity(isrow,&row_identity));
3395   PetscCall(ISIdentity(isicol,&icol_identity));
3396   if (row_identity && icol_identity) {
3397     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3398   } else {
3399     B->ops->solve = MatSolve_SeqAIJ;
3400   }
3401 
3402   B->ops->solveadd          = NULL;
3403   B->ops->solvetranspose    = NULL;
3404   B->ops->solvetransposeadd = NULL;
3405   B->ops->matsolve          = NULL;
3406   B->assembled              = PETSC_TRUE;
3407   B->preallocated           = PETSC_TRUE;
3408   PetscFunctionReturn(0);
3409 }
3410 
3411 /* a wraper of MatILUDTFactor_SeqAIJ() */
3412 /*
3413     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3414 */
3415 
3416 PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3417 {
3418   PetscFunctionBegin;
3419   PetscCall(MatILUDTFactor_SeqAIJ(A,row,col,info,&fact));
3420   PetscFunctionReturn(0);
3421 }
3422 
3423 /*
3424    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3425    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3426 */
3427 /*
3428     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3429 */
3430 
3431 PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3432 {
3433   Mat            C     =fact;
3434   Mat_SeqAIJ     *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3435   IS             isrow = b->row,isicol = b->icol;
3436   const PetscInt *r,*ic,*ics;
3437   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3438   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3439   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3440   PetscReal      dt=info->dt,shift=info->shiftamount;
3441   PetscBool      row_identity, col_identity;
3442 
3443   PetscFunctionBegin;
3444   PetscCall(ISGetIndices(isrow,&r));
3445   PetscCall(ISGetIndices(isicol,&ic));
3446   PetscCall(PetscMalloc1(n+1,&rtmp));
3447   ics  = ic;
3448 
3449   for (i=0; i<n; i++) {
3450     /* initialize rtmp array */
3451     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3452     bjtmp = bj + bi[i];
3453     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3454     rtmp[i] = 0.0;
3455     nzu     = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3456     bjtmp   = bj + bdiag[i+1] + 1;
3457     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3458 
3459     /* load in initial unfactored row of A */
3460     nz    = ai[r[i]+1] - ai[r[i]];
3461     ajtmp = aj + ai[r[i]];
3462     v     = aa + ai[r[i]];
3463     for (j=0; j<nz; j++) {
3464       rtmp[ics[*ajtmp++]] = v[j];
3465     }
3466 
3467     /* numerical factorization */
3468     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3469     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3470     k     = 0;
3471     while (k < nzl) {
3472       row        = *bjtmp++;
3473       pc         = rtmp + row;
3474       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3475       multiplier = (*pc) * (*pv);
3476       *pc        = multiplier;
3477       if (PetscAbsScalar(multiplier) > dt) {
3478         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3479         pv = b->a + bdiag[row+1] + 1;
3480         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3481         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3482         PetscCall(PetscLogFlops(1+2.0*nz));
3483       }
3484       k++;
3485     }
3486 
3487     /* finished row so stick it into b->a */
3488     /* L-part */
3489     pv  = b->a + bi[i];
3490     pj  = bj + bi[i];
3491     nzl = bi[i+1] - bi[i];
3492     for (j=0; j<nzl; j++) {
3493       pv[j] = rtmp[pj[j]];
3494     }
3495 
3496     /* diagonal: invert diagonal entries for simpler triangular solves */
3497     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3498     b->a[bdiag[i]] = 1.0/rtmp[i];
3499 
3500     /* U-part */
3501     pv  = b->a + bdiag[i+1] + 1;
3502     pj  = bj + bdiag[i+1] + 1;
3503     nzu = bdiag[i] - bdiag[i+1] - 1;
3504     for (j=0; j<nzu; j++) {
3505       pv[j] = rtmp[pj[j]];
3506     }
3507   }
3508 
3509   PetscCall(PetscFree(rtmp));
3510   PetscCall(ISRestoreIndices(isicol,&ic));
3511   PetscCall(ISRestoreIndices(isrow,&r));
3512 
3513   PetscCall(ISIdentity(isrow,&row_identity));
3514   PetscCall(ISIdentity(isicol,&col_identity));
3515   if (row_identity && col_identity) {
3516     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3517   } else {
3518     C->ops->solve = MatSolve_SeqAIJ;
3519   }
3520   C->ops->solveadd          = NULL;
3521   C->ops->solvetranspose    = NULL;
3522   C->ops->solvetransposeadd = NULL;
3523   C->ops->matsolve          = NULL;
3524   C->assembled              = PETSC_TRUE;
3525   C->preallocated           = PETSC_TRUE;
3526 
3527   PetscCall(PetscLogFlops(C->cmap->n));
3528   PetscFunctionReturn(0);
3529 }
3530