xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision d2522c19e8fa9bca20aaca277941d9a63e71db6a)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 /*
8       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
9 
10       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
11 */
12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol) {
13   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
14   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
15   const PetscInt    *ai = a->i, *aj = a->j;
16   const PetscScalar *aa = a->a;
17   PetscBool         *done;
18   PetscReal          best, past = 0, future;
19 
20   PetscFunctionBegin;
21   /* pick initial row */
22   best = -1;
23   for (i = 0; i < n; i++) {
24     future = 0.0;
25     for (j = ai[i]; j < ai[i + 1]; j++) {
26       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
27       else past = PetscAbsScalar(aa[j]);
28     }
29     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
30     if (past / future > best) {
31       best    = past / future;
32       current = i;
33     }
34   }
35 
36   PetscCall(PetscMalloc1(n, &done));
37   PetscCall(PetscArrayzero(done, n));
38   PetscCall(PetscMalloc1(n, &order));
39   order[0] = current;
40   for (i = 0; i < n - 1; i++) {
41     done[current] = PETSC_TRUE;
42     best          = -1;
43     /* loop over all neighbors of current pivot */
44     for (j = ai[current]; j < ai[current + 1]; j++) {
45       jj = aj[j];
46       if (done[jj]) continue;
47       /* loop over columns of potential next row computing weights for below and above diagonal */
48       past = future = 0.0;
49       for (k = ai[jj]; k < ai[jj + 1]; k++) {
50         kk = aj[k];
51         if (done[kk]) past += PetscAbsScalar(aa[k]);
52         else if (kk != jj) future += PetscAbsScalar(aa[k]);
53       }
54       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
55       if (past / future > best) {
56         best       = past / future;
57         newcurrent = jj;
58       }
59     }
60     if (best == -1) { /* no neighbors to select from so select best of all that remain */
61       best = -1;
62       for (k = 0; k < n; k++) {
63         if (done[k]) continue;
64         future = 0.0;
65         past   = 0.0;
66         for (j = ai[k]; j < ai[k + 1]; j++) {
67           kk = aj[j];
68           if (done[kk]) past += PetscAbsScalar(aa[j]);
69           else if (kk != k) future += PetscAbsScalar(aa[j]);
70         }
71         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
72         if (past / future > best) {
73           best       = past / future;
74           newcurrent = k;
75         }
76       }
77     }
78     PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
79     current      = newcurrent;
80     order[i + 1] = current;
81   }
82   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
83   *icol = *irow;
84   PetscCall(PetscObjectReference((PetscObject)*irow));
85   PetscCall(PetscFree(done));
86   PetscCall(PetscFree(order));
87   PetscFunctionReturn(0);
88 }
89 
90 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) {
91   PetscFunctionBegin;
92   *type = MATSOLVERPETSC;
93   PetscFunctionReturn(0);
94 }
95 
96 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) {
97   PetscInt n = A->rmap->n;
98 
99   PetscFunctionBegin;
100 #if defined(PETSC_USE_COMPLEX)
101   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY or ICC Factor is not supported");
102 #endif
103   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
104   PetscCall(MatSetSizes(*B, n, n, n, n));
105   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
106     PetscCall(MatSetType(*B, MATSEQAIJ));
107 
108     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
109     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
110 
111     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
112     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
113     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
114     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
115   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
116     PetscCall(MatSetType(*B, MATSEQSBAIJ));
117     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
118 
119     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
120     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
121     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
122     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
123   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
124   (*B)->factortype = ftype;
125 
126   PetscCall(PetscFree((*B)->solvertype));
127   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
128   (*B)->canuseordering = PETSC_TRUE;
129   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
130   PetscFunctionReturn(0);
131 }
132 
133 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
134   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
135   IS                 isicol;
136   const PetscInt    *r, *ic;
137   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
138   PetscInt          *bi, *bj, *ajtmp;
139   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
140   PetscReal          f;
141   PetscInt           nlnk, *lnk, k, **bi_ptr;
142   PetscFreeSpaceList free_space = NULL, current_space = NULL;
143   PetscBT            lnkbt;
144   PetscBool          missing;
145 
146   PetscFunctionBegin;
147   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
148   PetscCall(MatMissingDiagonal(A, &missing, &i));
149   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
150 
151   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
152   PetscCall(ISGetIndices(isrow, &r));
153   PetscCall(ISGetIndices(isicol, &ic));
154 
155   /* get new row pointers */
156   PetscCall(PetscMalloc1(n + 1, &bi));
157   bi[0] = 0;
158 
159   /* bdiag is location of diagonal in factor */
160   PetscCall(PetscMalloc1(n + 1, &bdiag));
161   bdiag[0] = 0;
162 
163   /* linked list for storing column indices of the active row */
164   nlnk = n + 1;
165   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
166 
167   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
168 
169   /* initial FreeSpace size is f*(ai[n]+1) */
170   f = info->fill;
171   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
172   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
173   current_space = free_space;
174 
175   for (i = 0; i < n; i++) {
176     /* copy previous fill into linked list */
177     nzi   = 0;
178     nnz   = ai[r[i] + 1] - ai[r[i]];
179     ajtmp = aj + ai[r[i]];
180     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
181     nzi += nlnk;
182 
183     /* add pivot rows into linked list */
184     row = lnk[n];
185     while (row < i) {
186       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
187       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
188       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
189       nzi += nlnk;
190       row = lnk[row];
191     }
192     bi[i + 1] = bi[i] + nzi;
193     im[i]     = nzi;
194 
195     /* mark bdiag */
196     nzbd = 0;
197     nnz  = nzi;
198     k    = lnk[n];
199     while (nnz-- && k < i) {
200       nzbd++;
201       k = lnk[k];
202     }
203     bdiag[i] = bi[i] + nzbd;
204 
205     /* if free space is not available, make more free space */
206     if (current_space->local_remaining < nzi) {
207       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
208       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
209       reallocs++;
210     }
211 
212     /* copy data into free space, then initialize lnk */
213     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
214 
215     bi_ptr[i] = current_space->array;
216     current_space->array += nzi;
217     current_space->local_used += nzi;
218     current_space->local_remaining -= nzi;
219   }
220 #if defined(PETSC_USE_INFO)
221   if (ai[n] != 0) {
222     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
223     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
224     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
225     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
226     PetscCall(PetscInfo(A, "for best performance.\n"));
227   } else {
228     PetscCall(PetscInfo(A, "Empty matrix\n"));
229   }
230 #endif
231 
232   PetscCall(ISRestoreIndices(isrow, &r));
233   PetscCall(ISRestoreIndices(isicol, &ic));
234 
235   /* destroy list of free space and other temporary array(s) */
236   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
237   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
238   PetscCall(PetscLLDestroy(lnk, lnkbt));
239   PetscCall(PetscFree2(bi_ptr, im));
240 
241   /* put together the new matrix */
242   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
243   PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)isicol));
244   b = (Mat_SeqAIJ *)(B)->data;
245 
246   b->free_a       = PETSC_TRUE;
247   b->free_ij      = PETSC_TRUE;
248   b->singlemalloc = PETSC_FALSE;
249 
250   PetscCall(PetscMalloc1(bi[n] + 1, &b->a));
251   b->j    = bj;
252   b->i    = bi;
253   b->diag = bdiag;
254   b->ilen = NULL;
255   b->imax = NULL;
256   b->row  = isrow;
257   b->col  = iscol;
258   PetscCall(PetscObjectReference((PetscObject)isrow));
259   PetscCall(PetscObjectReference((PetscObject)iscol));
260   b->icol = isicol;
261   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
262 
263   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
264   PetscCall(PetscLogObjectMemory((PetscObject)B, (bi[n] - n) * (sizeof(PetscInt) + sizeof(PetscScalar))));
265   b->maxnz = b->nz = bi[n];
266 
267   (B)->factortype            = MAT_FACTOR_LU;
268   (B)->info.factor_mallocs   = reallocs;
269   (B)->info.fill_ratio_given = f;
270 
271   if (ai[n]) {
272     (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
273   } else {
274     (B)->info.fill_ratio_needed = 0.0;
275   }
276   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
277   if (a->inode.size) { (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; }
278   PetscFunctionReturn(0);
279 }
280 
281 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
282   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
283   IS                 isicol;
284   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
285   PetscInt           i, n = A->rmap->n;
286   PetscInt          *bi, *bj;
287   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
288   PetscReal          f;
289   PetscInt           nlnk, *lnk, k, **bi_ptr;
290   PetscFreeSpaceList free_space = NULL, current_space = NULL;
291   PetscBT            lnkbt;
292   PetscBool          missing;
293 
294   PetscFunctionBegin;
295   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
296   PetscCall(MatMissingDiagonal(A, &missing, &i));
297   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
298 
299   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
300   PetscCall(ISGetIndices(isrow, &r));
301   PetscCall(ISGetIndices(isicol, &ic));
302 
303   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
304   PetscCall(PetscMalloc1(n + 1, &bi));
305   PetscCall(PetscMalloc1(n + 1, &bdiag));
306   bi[0] = bdiag[0] = 0;
307 
308   /* linked list for storing column indices of the active row */
309   nlnk = n + 1;
310   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
311 
312   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
313 
314   /* initial FreeSpace size is f*(ai[n]+1) */
315   f = info->fill;
316   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
317   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
318   current_space = free_space;
319 
320   for (i = 0; i < n; i++) {
321     /* copy previous fill into linked list */
322     nzi   = 0;
323     nnz   = ai[r[i] + 1] - ai[r[i]];
324     ajtmp = aj + ai[r[i]];
325     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
326     nzi += nlnk;
327 
328     /* add pivot rows into linked list */
329     row = lnk[n];
330     while (row < i) {
331       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
332       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
333       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
334       nzi += nlnk;
335       row = lnk[row];
336     }
337     bi[i + 1] = bi[i] + nzi;
338     im[i]     = nzi;
339 
340     /* mark bdiag */
341     nzbd = 0;
342     nnz  = nzi;
343     k    = lnk[n];
344     while (nnz-- && k < i) {
345       nzbd++;
346       k = lnk[k];
347     }
348     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
349 
350     /* if free space is not available, make more free space */
351     if (current_space->local_remaining < nzi) {
352       /* estimated additional space needed */
353       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
354       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
355       reallocs++;
356     }
357 
358     /* copy data into free space, then initialize lnk */
359     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
360 
361     bi_ptr[i] = current_space->array;
362     current_space->array += nzi;
363     current_space->local_used += nzi;
364     current_space->local_remaining -= nzi;
365   }
366 
367   PetscCall(ISRestoreIndices(isrow, &r));
368   PetscCall(ISRestoreIndices(isicol, &ic));
369 
370   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
371   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
372   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
373   PetscCall(PetscLLDestroy(lnk, lnkbt));
374   PetscCall(PetscFree2(bi_ptr, im));
375 
376   /* put together the new matrix */
377   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
378   PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)isicol));
379   b = (Mat_SeqAIJ *)(B)->data;
380 
381   b->free_a       = PETSC_TRUE;
382   b->free_ij      = PETSC_TRUE;
383   b->singlemalloc = PETSC_FALSE;
384 
385   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
386 
387   b->j    = bj;
388   b->i    = bi;
389   b->diag = bdiag;
390   b->ilen = NULL;
391   b->imax = NULL;
392   b->row  = isrow;
393   b->col  = iscol;
394   PetscCall(PetscObjectReference((PetscObject)isrow));
395   PetscCall(PetscObjectReference((PetscObject)iscol));
396   b->icol = isicol;
397   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
398 
399   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
400   PetscCall(PetscLogObjectMemory((PetscObject)B, (bdiag[0] + 1) * (sizeof(PetscInt) + sizeof(PetscScalar))));
401   b->maxnz = b->nz = bdiag[0] + 1;
402 
403   B->factortype            = MAT_FACTOR_LU;
404   B->info.factor_mallocs   = reallocs;
405   B->info.fill_ratio_given = f;
406 
407   if (ai[n]) {
408     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
409   } else {
410     B->info.fill_ratio_needed = 0.0;
411   }
412 #if defined(PETSC_USE_INFO)
413   if (ai[n] != 0) {
414     PetscReal af = B->info.fill_ratio_needed;
415     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
416     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
417     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
418     PetscCall(PetscInfo(A, "for best performance.\n"));
419   } else {
420     PetscCall(PetscInfo(A, "Empty matrix\n"));
421   }
422 #endif
423   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
424   if (a->inode.size) { B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; }
425   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
426   PetscFunctionReturn(0);
427 }
428 
429 /*
430     Trouble in factorization, should we dump the original matrix?
431 */
432 PetscErrorCode MatFactorDumpMatrix(Mat A) {
433   PetscBool flg = PETSC_FALSE;
434 
435   PetscFunctionBegin;
436   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
437   if (flg) {
438     PetscViewer viewer;
439     char        filename[PETSC_MAX_PATH_LEN];
440 
441     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
442     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
443     PetscCall(MatView(A, viewer));
444     PetscCall(PetscViewerDestroy(&viewer));
445   }
446   PetscFunctionReturn(0);
447 }
448 
449 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) {
450   Mat              C = B;
451   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
452   IS               isrow = b->row, isicol = b->icol;
453   const PetscInt  *r, *ic, *ics;
454   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
455   PetscInt         i, j, k, nz, nzL, row, *pj;
456   const PetscInt  *ajtmp, *bjtmp;
457   MatScalar       *rtmp, *pc, multiplier, *pv;
458   const MatScalar *aa = a->a, *v;
459   PetscBool        row_identity, col_identity;
460   FactorShiftCtx   sctx;
461   const PetscInt  *ddiag;
462   PetscReal        rs;
463   MatScalar        d;
464 
465   PetscFunctionBegin;
466   /* MatPivotSetUp(): initialize shift context sctx */
467   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
468 
469   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
470     ddiag          = a->diag;
471     sctx.shift_top = info->zeropivot;
472     for (i = 0; i < n; i++) {
473       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
474       d  = (aa)[ddiag[i]];
475       rs = -PetscAbsScalar(d) - PetscRealPart(d);
476       v  = aa + ai[i];
477       nz = ai[i + 1] - ai[i];
478       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
479       if (rs > sctx.shift_top) sctx.shift_top = rs;
480     }
481     sctx.shift_top *= 1.1;
482     sctx.nshift_max = 5;
483     sctx.shift_lo   = 0.;
484     sctx.shift_hi   = 1.;
485   }
486 
487   PetscCall(ISGetIndices(isrow, &r));
488   PetscCall(ISGetIndices(isicol, &ic));
489   PetscCall(PetscMalloc1(n + 1, &rtmp));
490   ics = ic;
491 
492   do {
493     sctx.newshift = PETSC_FALSE;
494     for (i = 0; i < n; i++) {
495       /* zero rtmp */
496       /* L part */
497       nz    = bi[i + 1] - bi[i];
498       bjtmp = bj + bi[i];
499       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
500 
501       /* U part */
502       nz    = bdiag[i] - bdiag[i + 1];
503       bjtmp = bj + bdiag[i + 1] + 1;
504       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
505 
506       /* load in initial (unfactored row) */
507       nz    = ai[r[i] + 1] - ai[r[i]];
508       ajtmp = aj + ai[r[i]];
509       v     = aa + ai[r[i]];
510       for (j = 0; j < nz; j++) { rtmp[ics[ajtmp[j]]] = v[j]; }
511       /* ZeropivotApply() */
512       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
513 
514       /* elimination */
515       bjtmp = bj + bi[i];
516       row   = *bjtmp++;
517       nzL   = bi[i + 1] - bi[i];
518       for (k = 0; k < nzL; k++) {
519         pc = rtmp + row;
520         if (*pc != 0.0) {
521           pv         = b->a + bdiag[row];
522           multiplier = *pc * (*pv);
523           *pc        = multiplier;
524 
525           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
526           pv = b->a + bdiag[row + 1] + 1;
527           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
528 
529           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
530           PetscCall(PetscLogFlops(1 + 2.0 * nz));
531         }
532         row = *bjtmp++;
533       }
534 
535       /* finished row so stick it into b->a */
536       rs = 0.0;
537       /* L part */
538       pv = b->a + bi[i];
539       pj = b->j + bi[i];
540       nz = bi[i + 1] - bi[i];
541       for (j = 0; j < nz; j++) {
542         pv[j] = rtmp[pj[j]];
543         rs += PetscAbsScalar(pv[j]);
544       }
545 
546       /* U part */
547       pv = b->a + bdiag[i + 1] + 1;
548       pj = b->j + bdiag[i + 1] + 1;
549       nz = bdiag[i] - bdiag[i + 1] - 1;
550       for (j = 0; j < nz; j++) {
551         pv[j] = rtmp[pj[j]];
552         rs += PetscAbsScalar(pv[j]);
553       }
554 
555       sctx.rs = rs;
556       sctx.pv = rtmp[i];
557       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
558       if (sctx.newshift) break; /* break for-loop */
559       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
560 
561       /* Mark diagonal and invert diagonal for simpler triangular solves */
562       pv  = b->a + bdiag[i];
563       *pv = 1.0 / rtmp[i];
564 
565     } /* endof for (i=0; i<n; i++) { */
566 
567     /* MatPivotRefine() */
568     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
569       /*
570        * if no shift in this attempt & shifting & started shifting & can refine,
571        * then try lower shift
572        */
573       sctx.shift_hi       = sctx.shift_fraction;
574       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
575       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
576       sctx.newshift       = PETSC_TRUE;
577       sctx.nshift++;
578     }
579   } while (sctx.newshift);
580 
581   PetscCall(PetscFree(rtmp));
582   PetscCall(ISRestoreIndices(isicol, &ic));
583   PetscCall(ISRestoreIndices(isrow, &r));
584 
585   PetscCall(ISIdentity(isrow, &row_identity));
586   PetscCall(ISIdentity(isicol, &col_identity));
587   if (b->inode.size) {
588     C->ops->solve = MatSolve_SeqAIJ_Inode;
589   } else if (row_identity && col_identity) {
590     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
591   } else {
592     C->ops->solve = MatSolve_SeqAIJ;
593   }
594   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
595   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
596   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
597   C->ops->matsolve          = MatMatSolve_SeqAIJ;
598   C->assembled              = PETSC_TRUE;
599   C->preallocated           = PETSC_TRUE;
600 
601   PetscCall(PetscLogFlops(C->cmap->n));
602 
603   /* MatShiftView(A,info,&sctx) */
604   if (sctx.nshift) {
605     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
606       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
607     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
608       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
609     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
610       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
611     }
612   }
613   PetscFunctionReturn(0);
614 }
615 
616 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) {
617   Mat              C = B;
618   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
619   IS               isrow = b->row, isicol = b->icol;
620   const PetscInt  *r, *ic, *ics;
621   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
622   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
623   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
624   MatScalar       *pv, *rtmp, *pc, multiplier, d;
625   const MatScalar *v, *aa = a->a;
626   PetscReal        rs = 0.0;
627   FactorShiftCtx   sctx;
628   const PetscInt  *ddiag;
629   PetscBool        row_identity, col_identity;
630 
631   PetscFunctionBegin;
632   /* MatPivotSetUp(): initialize shift context sctx */
633   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
634 
635   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
636     ddiag          = a->diag;
637     sctx.shift_top = info->zeropivot;
638     for (i = 0; i < n; i++) {
639       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
640       d  = (aa)[ddiag[i]];
641       rs = -PetscAbsScalar(d) - PetscRealPart(d);
642       v  = aa + ai[i];
643       nz = ai[i + 1] - ai[i];
644       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
645       if (rs > sctx.shift_top) sctx.shift_top = rs;
646     }
647     sctx.shift_top *= 1.1;
648     sctx.nshift_max = 5;
649     sctx.shift_lo   = 0.;
650     sctx.shift_hi   = 1.;
651   }
652 
653   PetscCall(ISGetIndices(isrow, &r));
654   PetscCall(ISGetIndices(isicol, &ic));
655   PetscCall(PetscMalloc1(n + 1, &rtmp));
656   ics = ic;
657 
658   do {
659     sctx.newshift = PETSC_FALSE;
660     for (i = 0; i < n; i++) {
661       nz    = bi[i + 1] - bi[i];
662       bjtmp = bj + bi[i];
663       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
664 
665       /* load in initial (unfactored row) */
666       nz    = ai[r[i] + 1] - ai[r[i]];
667       ajtmp = aj + ai[r[i]];
668       v     = aa + ai[r[i]];
669       for (j = 0; j < nz; j++) { rtmp[ics[ajtmp[j]]] = v[j]; }
670       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
671 
672       row = *bjtmp++;
673       while (row < i) {
674         pc = rtmp + row;
675         if (*pc != 0.0) {
676           pv         = b->a + diag_offset[row];
677           pj         = b->j + diag_offset[row] + 1;
678           multiplier = *pc / *pv++;
679           *pc        = multiplier;
680           nz         = bi[row + 1] - diag_offset[row] - 1;
681           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
682           PetscCall(PetscLogFlops(1 + 2.0 * nz));
683         }
684         row = *bjtmp++;
685       }
686       /* finished row so stick it into b->a */
687       pv   = b->a + bi[i];
688       pj   = b->j + bi[i];
689       nz   = bi[i + 1] - bi[i];
690       diag = diag_offset[i] - bi[i];
691       rs   = 0.0;
692       for (j = 0; j < nz; j++) {
693         pv[j] = rtmp[pj[j]];
694         rs += PetscAbsScalar(pv[j]);
695       }
696       rs -= PetscAbsScalar(pv[diag]);
697 
698       sctx.rs = rs;
699       sctx.pv = pv[diag];
700       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
701       if (sctx.newshift) break;
702       pv[diag] = sctx.pv;
703     }
704 
705     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
706       /*
707        * if no shift in this attempt & shifting & started shifting & can refine,
708        * then try lower shift
709        */
710       sctx.shift_hi       = sctx.shift_fraction;
711       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
712       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
713       sctx.newshift       = PETSC_TRUE;
714       sctx.nshift++;
715     }
716   } while (sctx.newshift);
717 
718   /* invert diagonal entries for simpler triangular solves */
719   for (i = 0; i < n; i++) { b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]]; }
720   PetscCall(PetscFree(rtmp));
721   PetscCall(ISRestoreIndices(isicol, &ic));
722   PetscCall(ISRestoreIndices(isrow, &r));
723 
724   PetscCall(ISIdentity(isrow, &row_identity));
725   PetscCall(ISIdentity(isicol, &col_identity));
726   if (row_identity && col_identity) {
727     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
728   } else {
729     C->ops->solve = MatSolve_SeqAIJ_inplace;
730   }
731   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
732   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
733   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
734   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
735 
736   C->assembled    = PETSC_TRUE;
737   C->preallocated = PETSC_TRUE;
738 
739   PetscCall(PetscLogFlops(C->cmap->n));
740   if (sctx.nshift) {
741     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
742       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
743     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
744       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
745     }
746   }
747   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
748   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
749 
750   PetscCall(MatSeqAIJCheckInode(C));
751   PetscFunctionReturn(0);
752 }
753 
754 /*
755    This routine implements inplace ILU(0) with row or/and column permutations.
756    Input:
757      A - original matrix
758    Output;
759      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
760          a->j (col index) is permuted by the inverse of colperm, then sorted
761          a->a reordered accordingly with a->j
762          a->diag (ptr to diagonal elements) is updated.
763 */
764 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) {
765   Mat_SeqAIJ      *a     = (Mat_SeqAIJ *)A->data;
766   IS               isrow = a->row, isicol = a->icol;
767   const PetscInt  *r, *ic, *ics;
768   PetscInt         i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
769   PetscInt        *ajtmp, nz, row;
770   PetscInt        *diag = a->diag, nbdiag, *pj;
771   PetscScalar     *rtmp, *pc, multiplier, d;
772   MatScalar       *pv, *v;
773   PetscReal        rs;
774   FactorShiftCtx   sctx;
775   const MatScalar *aa = a->a, *vtmp;
776 
777   PetscFunctionBegin;
778   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
779 
780   /* MatPivotSetUp(): initialize shift context sctx */
781   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
782 
783   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
784     const PetscInt *ddiag = a->diag;
785     sctx.shift_top        = info->zeropivot;
786     for (i = 0; i < n; i++) {
787       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
788       d    = (aa)[ddiag[i]];
789       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
790       vtmp = aa + ai[i];
791       nz   = ai[i + 1] - ai[i];
792       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
793       if (rs > sctx.shift_top) sctx.shift_top = rs;
794     }
795     sctx.shift_top *= 1.1;
796     sctx.nshift_max = 5;
797     sctx.shift_lo   = 0.;
798     sctx.shift_hi   = 1.;
799   }
800 
801   PetscCall(ISGetIndices(isrow, &r));
802   PetscCall(ISGetIndices(isicol, &ic));
803   PetscCall(PetscMalloc1(n + 1, &rtmp));
804   PetscCall(PetscArrayzero(rtmp, n + 1));
805   ics = ic;
806 
807 #if defined(MV)
808   sctx.shift_top      = 0.;
809   sctx.nshift_max     = 0;
810   sctx.shift_lo       = 0.;
811   sctx.shift_hi       = 0.;
812   sctx.shift_fraction = 0.;
813 
814   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
815     sctx.shift_top = 0.;
816     for (i = 0; i < n; i++) {
817       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
818       d  = (a->a)[diag[i]];
819       rs = -PetscAbsScalar(d) - PetscRealPart(d);
820       v  = a->a + ai[i];
821       nz = ai[i + 1] - ai[i];
822       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
823       if (rs > sctx.shift_top) sctx.shift_top = rs;
824     }
825     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
826     sctx.shift_top *= 1.1;
827     sctx.nshift_max = 5;
828     sctx.shift_lo   = 0.;
829     sctx.shift_hi   = 1.;
830   }
831 
832   sctx.shift_amount = 0.;
833   sctx.nshift       = 0;
834 #endif
835 
836   do {
837     sctx.newshift = PETSC_FALSE;
838     for (i = 0; i < n; i++) {
839       /* load in initial unfactored row */
840       nz    = ai[r[i] + 1] - ai[r[i]];
841       ajtmp = aj + ai[r[i]];
842       v     = a->a + ai[r[i]];
843       /* sort permuted ajtmp and values v accordingly */
844       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
845       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
846 
847       diag[r[i]] = ai[r[i]];
848       for (j = 0; j < nz; j++) {
849         rtmp[ajtmp[j]] = v[j];
850         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
851       }
852       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
853 
854       row = *ajtmp++;
855       while (row < i) {
856         pc = rtmp + row;
857         if (*pc != 0.0) {
858           pv = a->a + diag[r[row]];
859           pj = aj + diag[r[row]] + 1;
860 
861           multiplier = *pc / *pv++;
862           *pc        = multiplier;
863           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
864           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
865           PetscCall(PetscLogFlops(1 + 2.0 * nz));
866         }
867         row = *ajtmp++;
868       }
869       /* finished row so overwrite it onto a->a */
870       pv     = a->a + ai[r[i]];
871       pj     = aj + ai[r[i]];
872       nz     = ai[r[i] + 1] - ai[r[i]];
873       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
874 
875       rs = 0.0;
876       for (j = 0; j < nz; j++) {
877         pv[j] = rtmp[pj[j]];
878         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
879       }
880 
881       sctx.rs = rs;
882       sctx.pv = pv[nbdiag];
883       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
884       if (sctx.newshift) break;
885       pv[nbdiag] = sctx.pv;
886     }
887 
888     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
889       /*
890        * if no shift in this attempt & shifting & started shifting & can refine,
891        * then try lower shift
892        */
893       sctx.shift_hi       = sctx.shift_fraction;
894       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
895       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
896       sctx.newshift       = PETSC_TRUE;
897       sctx.nshift++;
898     }
899   } while (sctx.newshift);
900 
901   /* invert diagonal entries for simpler triangular solves */
902   for (i = 0; i < n; i++) { a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]]; }
903 
904   PetscCall(PetscFree(rtmp));
905   PetscCall(ISRestoreIndices(isicol, &ic));
906   PetscCall(ISRestoreIndices(isrow, &r));
907 
908   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
909   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
910   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
911   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
912 
913   A->assembled    = PETSC_TRUE;
914   A->preallocated = PETSC_TRUE;
915 
916   PetscCall(PetscLogFlops(A->cmap->n));
917   if (sctx.nshift) {
918     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
919       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
920     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
921       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
922     }
923   }
924   PetscFunctionReturn(0);
925 }
926 
927 /* ----------------------------------------------------------- */
928 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) {
929   Mat C;
930 
931   PetscFunctionBegin;
932   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
933   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
934   PetscCall(MatLUFactorNumeric(C, A, info));
935 
936   A->ops->solve          = C->ops->solve;
937   A->ops->solvetranspose = C->ops->solvetranspose;
938 
939   PetscCall(MatHeaderMerge(A, &C));
940   PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)((Mat_SeqAIJ *)(A->data))->icol));
941   PetscFunctionReturn(0);
942 }
943 /* ----------------------------------------------------------- */
944 
945 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) {
946   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
947   IS                 iscol = a->col, isrow = a->row;
948   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
949   PetscInt           nz;
950   const PetscInt    *rout, *cout, *r, *c;
951   PetscScalar       *x, *tmp, *tmps, sum;
952   const PetscScalar *b;
953   const MatScalar   *aa = a->a, *v;
954 
955   PetscFunctionBegin;
956   if (!n) PetscFunctionReturn(0);
957 
958   PetscCall(VecGetArrayRead(bb, &b));
959   PetscCall(VecGetArrayWrite(xx, &x));
960   tmp = a->solve_work;
961 
962   PetscCall(ISGetIndices(isrow, &rout));
963   r = rout;
964   PetscCall(ISGetIndices(iscol, &cout));
965   c = cout + (n - 1);
966 
967   /* forward solve the lower triangular */
968   tmp[0] = b[*r++];
969   tmps   = tmp;
970   for (i = 1; i < n; i++) {
971     v   = aa + ai[i];
972     vi  = aj + ai[i];
973     nz  = a->diag[i] - ai[i];
974     sum = b[*r++];
975     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
976     tmp[i] = sum;
977   }
978 
979   /* backward solve the upper triangular */
980   for (i = n - 1; i >= 0; i--) {
981     v   = aa + a->diag[i] + 1;
982     vi  = aj + a->diag[i] + 1;
983     nz  = ai[i + 1] - a->diag[i] - 1;
984     sum = tmp[i];
985     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
986     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
987   }
988 
989   PetscCall(ISRestoreIndices(isrow, &rout));
990   PetscCall(ISRestoreIndices(iscol, &cout));
991   PetscCall(VecRestoreArrayRead(bb, &b));
992   PetscCall(VecRestoreArrayWrite(xx, &x));
993   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
994   PetscFunctionReturn(0);
995 }
996 
997 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) {
998   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
999   IS                 iscol = a->col, isrow = a->row;
1000   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1001   PetscInt           nz, neq, ldb, ldx;
1002   const PetscInt    *rout, *cout, *r, *c;
1003   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
1004   const PetscScalar *b, *aa  = a->a, *v;
1005   PetscBool          isdense;
1006 
1007   PetscFunctionBegin;
1008   if (!n) PetscFunctionReturn(0);
1009   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1010   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1011   if (X != B) {
1012     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1013     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1014   }
1015   PetscCall(MatDenseGetArrayRead(B, &b));
1016   PetscCall(MatDenseGetLDA(B, &ldb));
1017   PetscCall(MatDenseGetArray(X, &x));
1018   PetscCall(MatDenseGetLDA(X, &ldx));
1019   PetscCall(ISGetIndices(isrow, &rout));
1020   r = rout;
1021   PetscCall(ISGetIndices(iscol, &cout));
1022   c = cout;
1023   for (neq = 0; neq < B->cmap->n; neq++) {
1024     /* forward solve the lower triangular */
1025     tmp[0] = b[r[0]];
1026     tmps   = tmp;
1027     for (i = 1; i < n; i++) {
1028       v   = aa + ai[i];
1029       vi  = aj + ai[i];
1030       nz  = a->diag[i] - ai[i];
1031       sum = b[r[i]];
1032       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1033       tmp[i] = sum;
1034     }
1035     /* backward solve the upper triangular */
1036     for (i = n - 1; i >= 0; i--) {
1037       v   = aa + a->diag[i] + 1;
1038       vi  = aj + a->diag[i] + 1;
1039       nz  = ai[i + 1] - a->diag[i] - 1;
1040       sum = tmp[i];
1041       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1042       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1043     }
1044     b += ldb;
1045     x += ldx;
1046   }
1047   PetscCall(ISRestoreIndices(isrow, &rout));
1048   PetscCall(ISRestoreIndices(iscol, &cout));
1049   PetscCall(MatDenseRestoreArrayRead(B, &b));
1050   PetscCall(MatDenseRestoreArray(X, &x));
1051   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) {
1056   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1057   IS                 iscol = a->col, isrow = a->row;
1058   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1059   PetscInt           nz, neq, ldb, ldx;
1060   const PetscInt    *rout, *cout, *r, *c;
1061   PetscScalar       *x, *tmp = a->solve_work, sum;
1062   const PetscScalar *b, *aa  = a->a, *v;
1063   PetscBool          isdense;
1064 
1065   PetscFunctionBegin;
1066   if (!n) PetscFunctionReturn(0);
1067   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1068   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1069   if (X != B) {
1070     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1071     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1072   }
1073   PetscCall(MatDenseGetArrayRead(B, &b));
1074   PetscCall(MatDenseGetLDA(B, &ldb));
1075   PetscCall(MatDenseGetArray(X, &x));
1076   PetscCall(MatDenseGetLDA(X, &ldx));
1077   PetscCall(ISGetIndices(isrow, &rout));
1078   r = rout;
1079   PetscCall(ISGetIndices(iscol, &cout));
1080   c = cout;
1081   for (neq = 0; neq < B->cmap->n; neq++) {
1082     /* forward solve the lower triangular */
1083     tmp[0] = b[r[0]];
1084     v      = aa;
1085     vi     = aj;
1086     for (i = 1; i < n; i++) {
1087       nz  = ai[i + 1] - ai[i];
1088       sum = b[r[i]];
1089       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1090       tmp[i] = sum;
1091       v += nz;
1092       vi += nz;
1093     }
1094     /* backward solve the upper triangular */
1095     for (i = n - 1; i >= 0; i--) {
1096       v   = aa + adiag[i + 1] + 1;
1097       vi  = aj + adiag[i + 1] + 1;
1098       nz  = adiag[i] - adiag[i + 1] - 1;
1099       sum = tmp[i];
1100       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1101       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1102     }
1103     b += ldb;
1104     x += ldx;
1105   }
1106   PetscCall(ISRestoreIndices(isrow, &rout));
1107   PetscCall(ISRestoreIndices(iscol, &cout));
1108   PetscCall(MatDenseRestoreArrayRead(B, &b));
1109   PetscCall(MatDenseRestoreArray(X, &x));
1110   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) {
1115   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1116   IS                 iscol = a->col, isrow = a->row;
1117   const PetscInt    *r, *c, *rout, *cout;
1118   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1119   PetscInt           nz, row;
1120   PetscScalar       *x, *tmp, *tmps, sum;
1121   const PetscScalar *b;
1122   const MatScalar   *aa = a->a, *v;
1123 
1124   PetscFunctionBegin;
1125   if (!n) PetscFunctionReturn(0);
1126 
1127   PetscCall(VecGetArrayRead(bb, &b));
1128   PetscCall(VecGetArrayWrite(xx, &x));
1129   tmp = a->solve_work;
1130 
1131   PetscCall(ISGetIndices(isrow, &rout));
1132   r = rout;
1133   PetscCall(ISGetIndices(iscol, &cout));
1134   c = cout + (n - 1);
1135 
1136   /* forward solve the lower triangular */
1137   tmp[0] = b[*r++];
1138   tmps   = tmp;
1139   for (row = 1; row < n; row++) {
1140     i   = rout[row]; /* permuted row */
1141     v   = aa + ai[i];
1142     vi  = aj + ai[i];
1143     nz  = a->diag[i] - ai[i];
1144     sum = b[*r++];
1145     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1146     tmp[row] = sum;
1147   }
1148 
1149   /* backward solve the upper triangular */
1150   for (row = n - 1; row >= 0; row--) {
1151     i   = rout[row]; /* permuted row */
1152     v   = aa + a->diag[i] + 1;
1153     vi  = aj + a->diag[i] + 1;
1154     nz  = ai[i + 1] - a->diag[i] - 1;
1155     sum = tmp[row];
1156     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1157     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1158   }
1159 
1160   PetscCall(ISRestoreIndices(isrow, &rout));
1161   PetscCall(ISRestoreIndices(iscol, &cout));
1162   PetscCall(VecRestoreArrayRead(bb, &b));
1163   PetscCall(VecRestoreArrayWrite(xx, &x));
1164   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 /* ----------------------------------------------------------- */
1169 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1170 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) {
1171   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1172   PetscInt           n  = A->rmap->n;
1173   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1174   PetscScalar       *x;
1175   const PetscScalar *b;
1176   const MatScalar   *aa = a->a;
1177 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1178   PetscInt         adiag_i, i, nz, ai_i;
1179   const PetscInt  *vi;
1180   const MatScalar *v;
1181   PetscScalar      sum;
1182 #endif
1183 
1184   PetscFunctionBegin;
1185   if (!n) PetscFunctionReturn(0);
1186 
1187   PetscCall(VecGetArrayRead(bb, &b));
1188   PetscCall(VecGetArrayWrite(xx, &x));
1189 
1190 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1191   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1192 #else
1193   /* forward solve the lower triangular */
1194   x[0] = b[0];
1195   for (i = 1; i < n; i++) {
1196     ai_i = ai[i];
1197     v    = aa + ai_i;
1198     vi   = aj + ai_i;
1199     nz   = adiag[i] - ai_i;
1200     sum  = b[i];
1201     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1202     x[i] = sum;
1203   }
1204 
1205   /* backward solve the upper triangular */
1206   for (i = n - 1; i >= 0; i--) {
1207     adiag_i = adiag[i];
1208     v       = aa + adiag_i + 1;
1209     vi      = aj + adiag_i + 1;
1210     nz      = ai[i + 1] - adiag_i - 1;
1211     sum     = x[i];
1212     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1213     x[i] = sum * aa[adiag_i];
1214   }
1215 #endif
1216   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1217   PetscCall(VecRestoreArrayRead(bb, &b));
1218   PetscCall(VecRestoreArrayWrite(xx, &x));
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) {
1223   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1224   IS                 iscol = a->col, isrow = a->row;
1225   PetscInt           i, n                  = A->rmap->n, j;
1226   PetscInt           nz;
1227   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1228   PetscScalar       *x, *tmp, sum;
1229   const PetscScalar *b;
1230   const MatScalar   *aa = a->a, *v;
1231 
1232   PetscFunctionBegin;
1233   if (yy != xx) PetscCall(VecCopy(yy, xx));
1234 
1235   PetscCall(VecGetArrayRead(bb, &b));
1236   PetscCall(VecGetArray(xx, &x));
1237   tmp = a->solve_work;
1238 
1239   PetscCall(ISGetIndices(isrow, &rout));
1240   r = rout;
1241   PetscCall(ISGetIndices(iscol, &cout));
1242   c = cout + (n - 1);
1243 
1244   /* forward solve the lower triangular */
1245   tmp[0] = b[*r++];
1246   for (i = 1; i < n; i++) {
1247     v   = aa + ai[i];
1248     vi  = aj + ai[i];
1249     nz  = a->diag[i] - ai[i];
1250     sum = b[*r++];
1251     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1252     tmp[i] = sum;
1253   }
1254 
1255   /* backward solve the upper triangular */
1256   for (i = n - 1; i >= 0; i--) {
1257     v   = aa + a->diag[i] + 1;
1258     vi  = aj + a->diag[i] + 1;
1259     nz  = ai[i + 1] - a->diag[i] - 1;
1260     sum = tmp[i];
1261     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1262     tmp[i] = sum * aa[a->diag[i]];
1263     x[*c--] += tmp[i];
1264   }
1265 
1266   PetscCall(ISRestoreIndices(isrow, &rout));
1267   PetscCall(ISRestoreIndices(iscol, &cout));
1268   PetscCall(VecRestoreArrayRead(bb, &b));
1269   PetscCall(VecRestoreArray(xx, &x));
1270   PetscCall(PetscLogFlops(2.0 * a->nz));
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) {
1275   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1276   IS                 iscol = a->col, isrow = a->row;
1277   PetscInt           i, n                  = A->rmap->n, j;
1278   PetscInt           nz;
1279   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1280   PetscScalar       *x, *tmp, sum;
1281   const PetscScalar *b;
1282   const MatScalar   *aa = a->a, *v;
1283 
1284   PetscFunctionBegin;
1285   if (yy != xx) PetscCall(VecCopy(yy, xx));
1286 
1287   PetscCall(VecGetArrayRead(bb, &b));
1288   PetscCall(VecGetArray(xx, &x));
1289   tmp = a->solve_work;
1290 
1291   PetscCall(ISGetIndices(isrow, &rout));
1292   r = rout;
1293   PetscCall(ISGetIndices(iscol, &cout));
1294   c = cout;
1295 
1296   /* forward solve the lower triangular */
1297   tmp[0] = b[r[0]];
1298   v      = aa;
1299   vi     = aj;
1300   for (i = 1; i < n; i++) {
1301     nz  = ai[i + 1] - ai[i];
1302     sum = b[r[i]];
1303     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1304     tmp[i] = sum;
1305     v += nz;
1306     vi += nz;
1307   }
1308 
1309   /* backward solve the upper triangular */
1310   v  = aa + adiag[n - 1];
1311   vi = aj + adiag[n - 1];
1312   for (i = n - 1; i >= 0; i--) {
1313     nz  = adiag[i] - adiag[i + 1] - 1;
1314     sum = tmp[i];
1315     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1316     tmp[i] = sum * v[nz];
1317     x[c[i]] += tmp[i];
1318     v += nz + 1;
1319     vi += nz + 1;
1320   }
1321 
1322   PetscCall(ISRestoreIndices(isrow, &rout));
1323   PetscCall(ISRestoreIndices(iscol, &cout));
1324   PetscCall(VecRestoreArrayRead(bb, &b));
1325   PetscCall(VecRestoreArray(xx, &x));
1326   PetscCall(PetscLogFlops(2.0 * a->nz));
1327   PetscFunctionReturn(0);
1328 }
1329 
1330 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) {
1331   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1332   IS                 iscol = a->col, isrow = a->row;
1333   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1334   PetscInt           i, n = A->rmap->n, j;
1335   PetscInt           nz;
1336   PetscScalar       *x, *tmp, s1;
1337   const MatScalar   *aa = a->a, *v;
1338   const PetscScalar *b;
1339 
1340   PetscFunctionBegin;
1341   PetscCall(VecGetArrayRead(bb, &b));
1342   PetscCall(VecGetArrayWrite(xx, &x));
1343   tmp = a->solve_work;
1344 
1345   PetscCall(ISGetIndices(isrow, &rout));
1346   r = rout;
1347   PetscCall(ISGetIndices(iscol, &cout));
1348   c = cout;
1349 
1350   /* copy the b into temp work space according to permutation */
1351   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1352 
1353   /* forward solve the U^T */
1354   for (i = 0; i < n; i++) {
1355     v  = aa + diag[i];
1356     vi = aj + diag[i] + 1;
1357     nz = ai[i + 1] - diag[i] - 1;
1358     s1 = tmp[i];
1359     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1360     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1361     tmp[i] = s1;
1362   }
1363 
1364   /* backward solve the L^T */
1365   for (i = n - 1; i >= 0; i--) {
1366     v  = aa + diag[i] - 1;
1367     vi = aj + diag[i] - 1;
1368     nz = diag[i] - ai[i];
1369     s1 = tmp[i];
1370     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1371   }
1372 
1373   /* copy tmp into x according to permutation */
1374   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1375 
1376   PetscCall(ISRestoreIndices(isrow, &rout));
1377   PetscCall(ISRestoreIndices(iscol, &cout));
1378   PetscCall(VecRestoreArrayRead(bb, &b));
1379   PetscCall(VecRestoreArrayWrite(xx, &x));
1380 
1381   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) {
1386   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1387   IS                 iscol = a->col, isrow = a->row;
1388   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1389   PetscInt           i, n = A->rmap->n, j;
1390   PetscInt           nz;
1391   PetscScalar       *x, *tmp, s1;
1392   const MatScalar   *aa = a->a, *v;
1393   const PetscScalar *b;
1394 
1395   PetscFunctionBegin;
1396   PetscCall(VecGetArrayRead(bb, &b));
1397   PetscCall(VecGetArrayWrite(xx, &x));
1398   tmp = a->solve_work;
1399 
1400   PetscCall(ISGetIndices(isrow, &rout));
1401   r = rout;
1402   PetscCall(ISGetIndices(iscol, &cout));
1403   c = cout;
1404 
1405   /* copy the b into temp work space according to permutation */
1406   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1407 
1408   /* forward solve the U^T */
1409   for (i = 0; i < n; i++) {
1410     v  = aa + adiag[i + 1] + 1;
1411     vi = aj + adiag[i + 1] + 1;
1412     nz = adiag[i] - adiag[i + 1] - 1;
1413     s1 = tmp[i];
1414     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1415     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1416     tmp[i] = s1;
1417   }
1418 
1419   /* backward solve the L^T */
1420   for (i = n - 1; i >= 0; i--) {
1421     v  = aa + ai[i];
1422     vi = aj + ai[i];
1423     nz = ai[i + 1] - ai[i];
1424     s1 = tmp[i];
1425     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1426   }
1427 
1428   /* copy tmp into x according to permutation */
1429   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1430 
1431   PetscCall(ISRestoreIndices(isrow, &rout));
1432   PetscCall(ISRestoreIndices(iscol, &cout));
1433   PetscCall(VecRestoreArrayRead(bb, &b));
1434   PetscCall(VecRestoreArrayWrite(xx, &x));
1435 
1436   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) {
1441   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1442   IS                 iscol = a->col, isrow = a->row;
1443   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1444   PetscInt           i, n = A->rmap->n, j;
1445   PetscInt           nz;
1446   PetscScalar       *x, *tmp, s1;
1447   const MatScalar   *aa = a->a, *v;
1448   const PetscScalar *b;
1449 
1450   PetscFunctionBegin;
1451   if (zz != xx) PetscCall(VecCopy(zz, xx));
1452   PetscCall(VecGetArrayRead(bb, &b));
1453   PetscCall(VecGetArray(xx, &x));
1454   tmp = a->solve_work;
1455 
1456   PetscCall(ISGetIndices(isrow, &rout));
1457   r = rout;
1458   PetscCall(ISGetIndices(iscol, &cout));
1459   c = cout;
1460 
1461   /* copy the b into temp work space according to permutation */
1462   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1463 
1464   /* forward solve the U^T */
1465   for (i = 0; i < n; i++) {
1466     v  = aa + diag[i];
1467     vi = aj + diag[i] + 1;
1468     nz = ai[i + 1] - diag[i] - 1;
1469     s1 = tmp[i];
1470     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1471     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1472     tmp[i] = s1;
1473   }
1474 
1475   /* backward solve the L^T */
1476   for (i = n - 1; i >= 0; i--) {
1477     v  = aa + diag[i] - 1;
1478     vi = aj + diag[i] - 1;
1479     nz = diag[i] - ai[i];
1480     s1 = tmp[i];
1481     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1482   }
1483 
1484   /* copy tmp into x according to permutation */
1485   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1486 
1487   PetscCall(ISRestoreIndices(isrow, &rout));
1488   PetscCall(ISRestoreIndices(iscol, &cout));
1489   PetscCall(VecRestoreArrayRead(bb, &b));
1490   PetscCall(VecRestoreArray(xx, &x));
1491 
1492   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) {
1497   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1498   IS                 iscol = a->col, isrow = a->row;
1499   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1500   PetscInt           i, n = A->rmap->n, j;
1501   PetscInt           nz;
1502   PetscScalar       *x, *tmp, s1;
1503   const MatScalar   *aa = a->a, *v;
1504   const PetscScalar *b;
1505 
1506   PetscFunctionBegin;
1507   if (zz != xx) PetscCall(VecCopy(zz, xx));
1508   PetscCall(VecGetArrayRead(bb, &b));
1509   PetscCall(VecGetArray(xx, &x));
1510   tmp = a->solve_work;
1511 
1512   PetscCall(ISGetIndices(isrow, &rout));
1513   r = rout;
1514   PetscCall(ISGetIndices(iscol, &cout));
1515   c = cout;
1516 
1517   /* copy the b into temp work space according to permutation */
1518   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1519 
1520   /* forward solve the U^T */
1521   for (i = 0; i < n; i++) {
1522     v  = aa + adiag[i + 1] + 1;
1523     vi = aj + adiag[i + 1] + 1;
1524     nz = adiag[i] - adiag[i + 1] - 1;
1525     s1 = tmp[i];
1526     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1527     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1528     tmp[i] = s1;
1529   }
1530 
1531   /* backward solve the L^T */
1532   for (i = n - 1; i >= 0; i--) {
1533     v  = aa + ai[i];
1534     vi = aj + ai[i];
1535     nz = ai[i + 1] - ai[i];
1536     s1 = tmp[i];
1537     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1538   }
1539 
1540   /* copy tmp into x according to permutation */
1541   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1542 
1543   PetscCall(ISRestoreIndices(isrow, &rout));
1544   PetscCall(ISRestoreIndices(iscol, &cout));
1545   PetscCall(VecRestoreArrayRead(bb, &b));
1546   PetscCall(VecRestoreArray(xx, &x));
1547 
1548   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1549   PetscFunctionReturn(0);
1550 }
1551 
1552 /* ----------------------------------------------------------------*/
1553 
1554 /*
1555    ilu() under revised new data structure.
1556    Factored arrays bj and ba are stored as
1557      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1558 
1559    bi=fact->i is an array of size n+1, in which
1560      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1561      bi[n]:  points to L(n-1,n-1)+1
1562 
1563   bdiag=fact->diag is an array of size n+1,in which
1564      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1565      bdiag[n]: points to entry of U(n-1,0)-1
1566 
1567    U(i,:) contains bdiag[i] as its last entry, i.e.,
1568     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1569 */
1570 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
1571   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1572   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1573   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1574   IS             isicol;
1575 
1576   PetscFunctionBegin;
1577   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1578   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1579   b = (Mat_SeqAIJ *)(fact)->data;
1580 
1581   /* allocate matrix arrays for new data structure */
1582   PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i));
1583   PetscCall(PetscLogObjectMemory((PetscObject)fact, ai[n] * (sizeof(PetscScalar) + sizeof(PetscInt)) + (n + 1) * sizeof(PetscInt)));
1584 
1585   b->singlemalloc = PETSC_TRUE;
1586   if (!b->diag) {
1587     PetscCall(PetscMalloc1(n + 1, &b->diag));
1588     PetscCall(PetscLogObjectMemory((PetscObject)fact, (n + 1) * sizeof(PetscInt)));
1589   }
1590   bdiag = b->diag;
1591 
1592   if (n > 0) { PetscCall(PetscArrayzero(b->a, ai[n])); }
1593 
1594   /* set bi and bj with new data structure */
1595   bi = b->i;
1596   bj = b->j;
1597 
1598   /* L part */
1599   bi[0] = 0;
1600   for (i = 0; i < n; i++) {
1601     nz        = adiag[i] - ai[i];
1602     bi[i + 1] = bi[i] + nz;
1603     aj        = a->j + ai[i];
1604     for (j = 0; j < nz; j++) { bj[k++] = aj[j]; }
1605   }
1606 
1607   /* U part */
1608   bdiag[n] = bi[n] - 1;
1609   for (i = n - 1; i >= 0; i--) {
1610     nz = ai[i + 1] - adiag[i] - 1;
1611     aj = a->j + adiag[i] + 1;
1612     for (j = 0; j < nz; j++) { bj[k++] = aj[j]; }
1613     /* diag[i] */
1614     bj[k++]  = i;
1615     bdiag[i] = bdiag[i + 1] + nz + 1;
1616   }
1617 
1618   fact->factortype             = MAT_FACTOR_ILU;
1619   fact->info.factor_mallocs    = 0;
1620   fact->info.fill_ratio_given  = info->fill;
1621   fact->info.fill_ratio_needed = 1.0;
1622   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1623   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1624 
1625   b       = (Mat_SeqAIJ *)(fact)->data;
1626   b->row  = isrow;
1627   b->col  = iscol;
1628   b->icol = isicol;
1629   PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
1630   PetscCall(PetscObjectReference((PetscObject)isrow));
1631   PetscCall(PetscObjectReference((PetscObject)iscol));
1632   PetscFunctionReturn(0);
1633 }
1634 
1635 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
1636   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1637   IS                 isicol;
1638   const PetscInt    *r, *ic;
1639   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1640   PetscInt          *bi, *cols, nnz, *cols_lvl;
1641   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1642   PetscInt           i, levels, diagonal_fill;
1643   PetscBool          col_identity, row_identity, missing;
1644   PetscReal          f;
1645   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1646   PetscBT            lnkbt;
1647   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1648   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1649   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1650 
1651   PetscFunctionBegin;
1652   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1653   PetscCall(MatMissingDiagonal(A, &missing, &i));
1654   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1655 
1656   levels = (PetscInt)info->levels;
1657   PetscCall(ISIdentity(isrow, &row_identity));
1658   PetscCall(ISIdentity(iscol, &col_identity));
1659   if (!levels && row_identity && col_identity) {
1660     /* special case: ilu(0) with natural ordering */
1661     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1662     if (a->inode.size) { fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; }
1663     PetscFunctionReturn(0);
1664   }
1665 
1666   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1667   PetscCall(ISGetIndices(isrow, &r));
1668   PetscCall(ISGetIndices(isicol, &ic));
1669 
1670   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1671   PetscCall(PetscMalloc1(n + 1, &bi));
1672   PetscCall(PetscMalloc1(n + 1, &bdiag));
1673   bi[0] = bdiag[0] = 0;
1674   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1675 
1676   /* create a linked list for storing column indices of the active row */
1677   nlnk = n + 1;
1678   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1679 
1680   /* initial FreeSpace size is f*(ai[n]+1) */
1681   f             = info->fill;
1682   diagonal_fill = (PetscInt)info->diagonal_fill;
1683   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1684   current_space = free_space;
1685   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1686   current_space_lvl = free_space_lvl;
1687   for (i = 0; i < n; i++) {
1688     nzi = 0;
1689     /* copy current row into linked list */
1690     nnz = ai[r[i] + 1] - ai[r[i]];
1691     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1692     cols   = aj + ai[r[i]];
1693     lnk[i] = -1; /* marker to indicate if diagonal exists */
1694     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1695     nzi += nlnk;
1696 
1697     /* make sure diagonal entry is included */
1698     if (diagonal_fill && lnk[i] == -1) {
1699       fm = n;
1700       while (lnk[fm] < i) fm = lnk[fm];
1701       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1702       lnk[fm]    = i;
1703       lnk_lvl[i] = 0;
1704       nzi++;
1705       dcount++;
1706     }
1707 
1708     /* add pivot rows into the active row */
1709     nzbd = 0;
1710     prow = lnk[n];
1711     while (prow < i) {
1712       nnz      = bdiag[prow];
1713       cols     = bj_ptr[prow] + nnz + 1;
1714       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1715       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1716       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1717       nzi += nlnk;
1718       prow = lnk[prow];
1719       nzbd++;
1720     }
1721     bdiag[i]  = nzbd;
1722     bi[i + 1] = bi[i] + nzi;
1723     /* if free space is not available, make more free space */
1724     if (current_space->local_remaining < nzi) {
1725       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1726       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1727       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1728       reallocs++;
1729     }
1730 
1731     /* copy data into free_space and free_space_lvl, then initialize lnk */
1732     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1733     bj_ptr[i]    = current_space->array;
1734     bjlvl_ptr[i] = current_space_lvl->array;
1735 
1736     /* make sure the active row i has diagonal entry */
1737     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1738 
1739     current_space->array += nzi;
1740     current_space->local_used += nzi;
1741     current_space->local_remaining -= nzi;
1742     current_space_lvl->array += nzi;
1743     current_space_lvl->local_used += nzi;
1744     current_space_lvl->local_remaining -= nzi;
1745   }
1746 
1747   PetscCall(ISRestoreIndices(isrow, &r));
1748   PetscCall(ISRestoreIndices(isicol, &ic));
1749   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1750   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1751   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1752 
1753   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1754   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1755   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1756 
1757 #if defined(PETSC_USE_INFO)
1758   {
1759     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1760     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1761     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1762     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1763     PetscCall(PetscInfo(A, "for best performance.\n"));
1764     if (diagonal_fill) { PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); }
1765   }
1766 #endif
1767   /* put together the new matrix */
1768   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1769   PetscCall(PetscLogObjectParent((PetscObject)fact, (PetscObject)isicol));
1770   b = (Mat_SeqAIJ *)(fact)->data;
1771 
1772   b->free_a       = PETSC_TRUE;
1773   b->free_ij      = PETSC_TRUE;
1774   b->singlemalloc = PETSC_FALSE;
1775 
1776   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
1777 
1778   b->j    = bj;
1779   b->i    = bi;
1780   b->diag = bdiag;
1781   b->ilen = NULL;
1782   b->imax = NULL;
1783   b->row  = isrow;
1784   b->col  = iscol;
1785   PetscCall(PetscObjectReference((PetscObject)isrow));
1786   PetscCall(PetscObjectReference((PetscObject)iscol));
1787   b->icol = isicol;
1788 
1789   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1790   /* In b structure:  Free imax, ilen, old a, old j.
1791      Allocate bdiag, solve_work, new a, new j */
1792   PetscCall(PetscLogObjectMemory((PetscObject)fact, (bdiag[0] + 1) * (sizeof(PetscInt) + sizeof(PetscScalar))));
1793   b->maxnz = b->nz = bdiag[0] + 1;
1794 
1795   (fact)->info.factor_mallocs    = reallocs;
1796   (fact)->info.fill_ratio_given  = f;
1797   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1798   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1799   if (a->inode.size) { (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; }
1800   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
1805   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1806   IS                 isicol;
1807   const PetscInt    *r, *ic;
1808   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1809   PetscInt          *bi, *cols, nnz, *cols_lvl;
1810   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1811   PetscInt           i, levels, diagonal_fill;
1812   PetscBool          col_identity, row_identity;
1813   PetscReal          f;
1814   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1815   PetscBT            lnkbt;
1816   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1817   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1818   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1819   PetscBool          missing;
1820 
1821   PetscFunctionBegin;
1822   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1823   PetscCall(MatMissingDiagonal(A, &missing, &i));
1824   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1825 
1826   f             = info->fill;
1827   levels        = (PetscInt)info->levels;
1828   diagonal_fill = (PetscInt)info->diagonal_fill;
1829 
1830   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1831 
1832   PetscCall(ISIdentity(isrow, &row_identity));
1833   PetscCall(ISIdentity(iscol, &col_identity));
1834   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1835     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));
1836 
1837     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1838     if (a->inode.size) { (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; }
1839     fact->factortype               = MAT_FACTOR_ILU;
1840     (fact)->info.factor_mallocs    = 0;
1841     (fact)->info.fill_ratio_given  = info->fill;
1842     (fact)->info.fill_ratio_needed = 1.0;
1843 
1844     b       = (Mat_SeqAIJ *)(fact)->data;
1845     b->row  = isrow;
1846     b->col  = iscol;
1847     b->icol = isicol;
1848     PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1849     PetscCall(PetscObjectReference((PetscObject)isrow));
1850     PetscCall(PetscObjectReference((PetscObject)iscol));
1851     PetscFunctionReturn(0);
1852   }
1853 
1854   PetscCall(ISGetIndices(isrow, &r));
1855   PetscCall(ISGetIndices(isicol, &ic));
1856 
1857   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1858   PetscCall(PetscMalloc1(n + 1, &bi));
1859   PetscCall(PetscMalloc1(n + 1, &bdiag));
1860   bi[0] = bdiag[0] = 0;
1861 
1862   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1863 
1864   /* create a linked list for storing column indices of the active row */
1865   nlnk = n + 1;
1866   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1867 
1868   /* initial FreeSpace size is f*(ai[n]+1) */
1869   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1870   current_space = free_space;
1871   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1872   current_space_lvl = free_space_lvl;
1873 
1874   for (i = 0; i < n; i++) {
1875     nzi = 0;
1876     /* copy current row into linked list */
1877     nnz = ai[r[i] + 1] - ai[r[i]];
1878     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1879     cols   = aj + ai[r[i]];
1880     lnk[i] = -1; /* marker to indicate if diagonal exists */
1881     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1882     nzi += nlnk;
1883 
1884     /* make sure diagonal entry is included */
1885     if (diagonal_fill && lnk[i] == -1) {
1886       fm = n;
1887       while (lnk[fm] < i) fm = lnk[fm];
1888       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1889       lnk[fm]    = i;
1890       lnk_lvl[i] = 0;
1891       nzi++;
1892       dcount++;
1893     }
1894 
1895     /* add pivot rows into the active row */
1896     nzbd = 0;
1897     prow = lnk[n];
1898     while (prow < i) {
1899       nnz      = bdiag[prow];
1900       cols     = bj_ptr[prow] + nnz + 1;
1901       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1902       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1903       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1904       nzi += nlnk;
1905       prow = lnk[prow];
1906       nzbd++;
1907     }
1908     bdiag[i]  = nzbd;
1909     bi[i + 1] = bi[i] + nzi;
1910 
1911     /* if free space is not available, make more free space */
1912     if (current_space->local_remaining < nzi) {
1913       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
1914       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1915       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1916       reallocs++;
1917     }
1918 
1919     /* copy data into free_space and free_space_lvl, then initialize lnk */
1920     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1921     bj_ptr[i]    = current_space->array;
1922     bjlvl_ptr[i] = current_space_lvl->array;
1923 
1924     /* make sure the active row i has diagonal entry */
1925     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1926 
1927     current_space->array += nzi;
1928     current_space->local_used += nzi;
1929     current_space->local_remaining -= nzi;
1930     current_space_lvl->array += nzi;
1931     current_space_lvl->local_used += nzi;
1932     current_space_lvl->local_remaining -= nzi;
1933   }
1934 
1935   PetscCall(ISRestoreIndices(isrow, &r));
1936   PetscCall(ISRestoreIndices(isicol, &ic));
1937 
1938   /* destroy list of free space and other temporary arrays */
1939   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1940   PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
1941   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1942   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1943   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1944 
1945 #if defined(PETSC_USE_INFO)
1946   {
1947     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
1948     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1949     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1950     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1951     PetscCall(PetscInfo(A, "for best performance.\n"));
1952     if (diagonal_fill) { PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); }
1953   }
1954 #endif
1955 
1956   /* put together the new matrix */
1957   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1958   PetscCall(PetscLogObjectParent((PetscObject)fact, (PetscObject)isicol));
1959   b = (Mat_SeqAIJ *)(fact)->data;
1960 
1961   b->free_a       = PETSC_TRUE;
1962   b->free_ij      = PETSC_TRUE;
1963   b->singlemalloc = PETSC_FALSE;
1964 
1965   PetscCall(PetscMalloc1(bi[n], &b->a));
1966   b->j = bj;
1967   b->i = bi;
1968   for (i = 0; i < n; i++) bdiag[i] += bi[i];
1969   b->diag = bdiag;
1970   b->ilen = NULL;
1971   b->imax = NULL;
1972   b->row  = isrow;
1973   b->col  = iscol;
1974   PetscCall(PetscObjectReference((PetscObject)isrow));
1975   PetscCall(PetscObjectReference((PetscObject)iscol));
1976   b->icol = isicol;
1977   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1978   /* In b structure:  Free imax, ilen, old a, old j.
1979      Allocate bdiag, solve_work, new a, new j */
1980   PetscCall(PetscLogObjectMemory((PetscObject)fact, (bi[n] - n) * (sizeof(PetscInt) + sizeof(PetscScalar))));
1981   b->maxnz = b->nz = bi[n];
1982 
1983   (fact)->info.factor_mallocs    = reallocs;
1984   (fact)->info.fill_ratio_given  = f;
1985   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
1986   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
1987   if (a->inode.size) { (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; }
1988   PetscFunctionReturn(0);
1989 }
1990 
1991 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) {
1992   Mat             C  = B;
1993   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
1994   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
1995   IS              ip = b->row, iip = b->icol;
1996   const PetscInt *rip, *riip;
1997   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1998   PetscInt       *ai = a->i, *aj = a->j;
1999   PetscInt        k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2000   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2001   PetscBool       perm_identity;
2002   FactorShiftCtx  sctx;
2003   PetscReal       rs;
2004   MatScalar       d, *v;
2005 
2006   PetscFunctionBegin;
2007   /* MatPivotSetUp(): initialize shift context sctx */
2008   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2009 
2010   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2011     sctx.shift_top = info->zeropivot;
2012     for (i = 0; i < mbs; i++) {
2013       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2014       d  = (aa)[a->diag[i]];
2015       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2016       v  = aa + ai[i];
2017       nz = ai[i + 1] - ai[i];
2018       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2019       if (rs > sctx.shift_top) sctx.shift_top = rs;
2020     }
2021     sctx.shift_top *= 1.1;
2022     sctx.nshift_max = 5;
2023     sctx.shift_lo   = 0.;
2024     sctx.shift_hi   = 1.;
2025   }
2026 
2027   PetscCall(ISGetIndices(ip, &rip));
2028   PetscCall(ISGetIndices(iip, &riip));
2029 
2030   /* allocate working arrays
2031      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2032      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2033   */
2034   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
2035 
2036   do {
2037     sctx.newshift = PETSC_FALSE;
2038 
2039     for (i = 0; i < mbs; i++) c2r[i] = mbs;
2040     if (mbs) il[0] = 0;
2041 
2042     for (k = 0; k < mbs; k++) {
2043       /* zero rtmp */
2044       nz    = bi[k + 1] - bi[k];
2045       bjtmp = bj + bi[k];
2046       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2047 
2048       /* load in initial unfactored row */
2049       bval = ba + bi[k];
2050       jmin = ai[rip[k]];
2051       jmax = ai[rip[k] + 1];
2052       for (j = jmin; j < jmax; j++) {
2053         col = riip[aj[j]];
2054         if (col >= k) { /* only take upper triangular entry */
2055           rtmp[col] = aa[j];
2056           *bval++   = 0.0; /* for in-place factorization */
2057         }
2058       }
2059       /* shift the diagonal of the matrix: ZeropivotApply() */
2060       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2061 
2062       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2063       dk = rtmp[k];
2064       i  = c2r[k]; /* first row to be added to k_th row  */
2065 
2066       while (i < k) {
2067         nexti = c2r[i]; /* next row to be added to k_th row */
2068 
2069         /* compute multiplier, update diag(k) and U(i,k) */
2070         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
2071         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2072         dk += uikdi * ba[ili];           /* update diag[k] */
2073         ba[ili] = uikdi;                 /* -U(i,k) */
2074 
2075         /* add multiple of row i to k-th row */
2076         jmin = ili + 1;
2077         jmax = bi[i + 1];
2078         if (jmin < jmax) {
2079           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2080           /* update il and c2r for row i */
2081           il[i]  = jmin;
2082           j      = bj[jmin];
2083           c2r[i] = c2r[j];
2084           c2r[j] = i;
2085         }
2086         i = nexti;
2087       }
2088 
2089       /* copy data into U(k,:) */
2090       rs   = 0.0;
2091       jmin = bi[k];
2092       jmax = bi[k + 1] - 1;
2093       if (jmin < jmax) {
2094         for (j = jmin; j < jmax; j++) {
2095           col   = bj[j];
2096           ba[j] = rtmp[col];
2097           rs += PetscAbsScalar(ba[j]);
2098         }
2099         /* add the k-th row into il and c2r */
2100         il[k]  = jmin;
2101         i      = bj[jmin];
2102         c2r[k] = c2r[i];
2103         c2r[i] = k;
2104       }
2105 
2106       /* MatPivotCheck() */
2107       sctx.rs = rs;
2108       sctx.pv = dk;
2109       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2110       if (sctx.newshift) break;
2111       dk = sctx.pv;
2112 
2113       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2114     }
2115   } while (sctx.newshift);
2116 
2117   PetscCall(PetscFree3(rtmp, il, c2r));
2118   PetscCall(ISRestoreIndices(ip, &rip));
2119   PetscCall(ISRestoreIndices(iip, &riip));
2120 
2121   PetscCall(ISIdentity(ip, &perm_identity));
2122   if (perm_identity) {
2123     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2124     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2125     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2126     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2127   } else {
2128     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2129     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2130     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2131     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2132   }
2133 
2134   C->assembled    = PETSC_TRUE;
2135   C->preallocated = PETSC_TRUE;
2136 
2137   PetscCall(PetscLogFlops(C->rmap->n));
2138 
2139   /* MatPivotView() */
2140   if (sctx.nshift) {
2141     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2142       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2143     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2144       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2145     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2146       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2147     }
2148   }
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) {
2153   Mat             C  = B;
2154   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2155   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2156   IS              ip = b->row, iip = b->icol;
2157   const PetscInt *rip, *riip;
2158   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2159   PetscInt       *ai = a->i, *aj = a->j;
2160   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2161   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2162   PetscBool       perm_identity;
2163   FactorShiftCtx  sctx;
2164   PetscReal       rs;
2165   MatScalar       d, *v;
2166 
2167   PetscFunctionBegin;
2168   /* MatPivotSetUp(): initialize shift context sctx */
2169   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2170 
2171   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2172     sctx.shift_top = info->zeropivot;
2173     for (i = 0; i < mbs; i++) {
2174       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2175       d  = (aa)[a->diag[i]];
2176       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2177       v  = aa + ai[i];
2178       nz = ai[i + 1] - ai[i];
2179       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2180       if (rs > sctx.shift_top) sctx.shift_top = rs;
2181     }
2182     sctx.shift_top *= 1.1;
2183     sctx.nshift_max = 5;
2184     sctx.shift_lo   = 0.;
2185     sctx.shift_hi   = 1.;
2186   }
2187 
2188   PetscCall(ISGetIndices(ip, &rip));
2189   PetscCall(ISGetIndices(iip, &riip));
2190 
2191   /* initialization */
2192   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
2193 
2194   do {
2195     sctx.newshift = PETSC_FALSE;
2196 
2197     for (i = 0; i < mbs; i++) jl[i] = mbs;
2198     il[0] = 0;
2199 
2200     for (k = 0; k < mbs; k++) {
2201       /* zero rtmp */
2202       nz    = bi[k + 1] - bi[k];
2203       bjtmp = bj + bi[k];
2204       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2205 
2206       bval = ba + bi[k];
2207       /* initialize k-th row by the perm[k]-th row of A */
2208       jmin = ai[rip[k]];
2209       jmax = ai[rip[k] + 1];
2210       for (j = jmin; j < jmax; j++) {
2211         col = riip[aj[j]];
2212         if (col >= k) { /* only take upper triangular entry */
2213           rtmp[col] = aa[j];
2214           *bval++   = 0.0; /* for in-place factorization */
2215         }
2216       }
2217       /* shift the diagonal of the matrix */
2218       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2219 
2220       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2221       dk = rtmp[k];
2222       i  = jl[k]; /* first row to be added to k_th row  */
2223 
2224       while (i < k) {
2225         nexti = jl[i]; /* next row to be added to k_th row */
2226 
2227         /* compute multiplier, update diag(k) and U(i,k) */
2228         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
2229         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2230         dk += uikdi * ba[ili];
2231         ba[ili] = uikdi; /* -U(i,k) */
2232 
2233         /* add multiple of row i to k-th row */
2234         jmin = ili + 1;
2235         jmax = bi[i + 1];
2236         if (jmin < jmax) {
2237           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2238           /* update il and jl for row i */
2239           il[i] = jmin;
2240           j     = bj[jmin];
2241           jl[i] = jl[j];
2242           jl[j] = i;
2243         }
2244         i = nexti;
2245       }
2246 
2247       /* shift the diagonals when zero pivot is detected */
2248       /* compute rs=sum of abs(off-diagonal) */
2249       rs   = 0.0;
2250       jmin = bi[k] + 1;
2251       nz   = bi[k + 1] - jmin;
2252       bcol = bj + jmin;
2253       for (j = 0; j < nz; j++) { rs += PetscAbsScalar(rtmp[bcol[j]]); }
2254 
2255       sctx.rs = rs;
2256       sctx.pv = dk;
2257       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2258       if (sctx.newshift) break;
2259       dk = sctx.pv;
2260 
2261       /* copy data into U(k,:) */
2262       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2263       jmin      = bi[k] + 1;
2264       jmax      = bi[k + 1];
2265       if (jmin < jmax) {
2266         for (j = jmin; j < jmax; j++) {
2267           col   = bj[j];
2268           ba[j] = rtmp[col];
2269         }
2270         /* add the k-th row into il and jl */
2271         il[k] = jmin;
2272         i     = bj[jmin];
2273         jl[k] = jl[i];
2274         jl[i] = k;
2275       }
2276     }
2277   } while (sctx.newshift);
2278 
2279   PetscCall(PetscFree3(rtmp, il, jl));
2280   PetscCall(ISRestoreIndices(ip, &rip));
2281   PetscCall(ISRestoreIndices(iip, &riip));
2282 
2283   PetscCall(ISIdentity(ip, &perm_identity));
2284   if (perm_identity) {
2285     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2286     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2287     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2288     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2289   } else {
2290     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2291     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2292     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2293     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2294   }
2295 
2296   C->assembled    = PETSC_TRUE;
2297   C->preallocated = PETSC_TRUE;
2298 
2299   PetscCall(PetscLogFlops(C->rmap->n));
2300   if (sctx.nshift) {
2301     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2302       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2303     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2304       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2305     }
2306   }
2307   PetscFunctionReturn(0);
2308 }
2309 
2310 /*
2311    icc() under revised new data structure.
2312    Factored arrays bj and ba are stored as
2313      U(0,:),...,U(i,:),U(n-1,:)
2314 
2315    ui=fact->i is an array of size n+1, in which
2316    ui+
2317      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2318      ui[n]:  points to U(n-1,n-1)+1
2319 
2320   udiag=fact->diag is an array of size n,in which
2321      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2322 
2323    U(i,:) contains udiag[i] as its last entry, i.e.,
2324     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2325 */
2326 
2327 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
2328   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2329   Mat_SeqSBAIJ      *b;
2330   PetscBool          perm_identity, missing;
2331   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2332   const PetscInt    *rip, *riip;
2333   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2334   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2335   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2336   PetscReal          fill = info->fill, levels = info->levels;
2337   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2338   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2339   PetscBT            lnkbt;
2340   IS                 iperm;
2341 
2342   PetscFunctionBegin;
2343   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2344   PetscCall(MatMissingDiagonal(A, &missing, &d));
2345   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2346   PetscCall(ISIdentity(perm, &perm_identity));
2347   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2348 
2349   PetscCall(PetscMalloc1(am + 1, &ui));
2350   PetscCall(PetscMalloc1(am + 1, &udiag));
2351   ui[0] = 0;
2352 
2353   /* ICC(0) without matrix ordering: simply rearrange column indices */
2354   if (!levels && perm_identity) {
2355     for (i = 0; i < am; i++) {
2356       ncols     = ai[i + 1] - a->diag[i];
2357       ui[i + 1] = ui[i] + ncols;
2358       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2359     }
2360     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2361     cols = uj;
2362     for (i = 0; i < am; i++) {
2363       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2364       ncols = ai[i + 1] - a->diag[i] - 1;
2365       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2366       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2367     }
2368   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2369     PetscCall(ISGetIndices(iperm, &riip));
2370     PetscCall(ISGetIndices(perm, &rip));
2371 
2372     /* initialization */
2373     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2374 
2375     /* jl: linked list for storing indices of the pivot rows
2376        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2377     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2378     for (i = 0; i < am; i++) {
2379       jl[i] = am;
2380       il[i] = 0;
2381     }
2382 
2383     /* create and initialize a linked list for storing column indices of the active row k */
2384     nlnk = am + 1;
2385     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2386 
2387     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2388     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2389     current_space = free_space;
2390     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2391     current_space_lvl = free_space_lvl;
2392 
2393     for (k = 0; k < am; k++) { /* for each active row k */
2394       /* initialize lnk by the column indices of row rip[k] of A */
2395       nzk   = 0;
2396       ncols = ai[rip[k] + 1] - ai[rip[k]];
2397       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2398       ncols_upper = 0;
2399       for (j = 0; j < ncols; j++) {
2400         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2401         if (riip[i] >= k) {         /* only take upper triangular entry */
2402           ajtmp[ncols_upper] = i;
2403           ncols_upper++;
2404         }
2405       }
2406       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2407       nzk += nlnk;
2408 
2409       /* update lnk by computing fill-in for each pivot row to be merged in */
2410       prow = jl[k]; /* 1st pivot row */
2411 
2412       while (prow < k) {
2413         nextprow = jl[prow];
2414 
2415         /* merge prow into k-th row */
2416         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2417         jmax  = ui[prow + 1];
2418         ncols = jmax - jmin;
2419         i     = jmin - ui[prow];
2420         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2421         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2422         j     = *(uj - 1);
2423         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2424         nzk += nlnk;
2425 
2426         /* update il and jl for prow */
2427         if (jmin < jmax) {
2428           il[prow] = jmin;
2429           j        = *cols;
2430           jl[prow] = jl[j];
2431           jl[j]    = prow;
2432         }
2433         prow = nextprow;
2434       }
2435 
2436       /* if free space is not available, make more free space */
2437       if (current_space->local_remaining < nzk) {
2438         i = am - k + 1;                                    /* num of unfactored rows */
2439         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2440         PetscCall(PetscFreeSpaceGet(i, &current_space));
2441         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2442         reallocs++;
2443       }
2444 
2445       /* copy data into free_space and free_space_lvl, then initialize lnk */
2446       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2447       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2448 
2449       /* add the k-th row into il and jl */
2450       if (nzk > 1) {
2451         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2452         jl[k] = jl[i];
2453         jl[i] = k;
2454         il[k] = ui[k] + 1;
2455       }
2456       uj_ptr[k]     = current_space->array;
2457       uj_lvl_ptr[k] = current_space_lvl->array;
2458 
2459       current_space->array += nzk;
2460       current_space->local_used += nzk;
2461       current_space->local_remaining -= nzk;
2462 
2463       current_space_lvl->array += nzk;
2464       current_space_lvl->local_used += nzk;
2465       current_space_lvl->local_remaining -= nzk;
2466 
2467       ui[k + 1] = ui[k] + nzk;
2468     }
2469 
2470     PetscCall(ISRestoreIndices(perm, &rip));
2471     PetscCall(ISRestoreIndices(iperm, &riip));
2472     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2473     PetscCall(PetscFree(ajtmp));
2474 
2475     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2476     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2477     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
2478     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2479     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2480 
2481   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2482 
2483   /* put together the new matrix in MATSEQSBAIJ format */
2484   b               = (Mat_SeqSBAIJ *)(fact)->data;
2485   b->singlemalloc = PETSC_FALSE;
2486 
2487   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2488 
2489   b->j         = uj;
2490   b->i         = ui;
2491   b->diag      = udiag;
2492   b->free_diag = PETSC_TRUE;
2493   b->ilen      = NULL;
2494   b->imax      = NULL;
2495   b->row       = perm;
2496   b->col       = perm;
2497   PetscCall(PetscObjectReference((PetscObject)perm));
2498   PetscCall(PetscObjectReference((PetscObject)perm));
2499   b->icol          = iperm;
2500   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2501 
2502   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2503   PetscCall(PetscLogObjectMemory((PetscObject)fact, ui[am] * (sizeof(PetscInt) + sizeof(MatScalar))));
2504 
2505   b->maxnz = b->nz = ui[am];
2506   b->free_a        = PETSC_TRUE;
2507   b->free_ij       = PETSC_TRUE;
2508 
2509   fact->info.factor_mallocs   = reallocs;
2510   fact->info.fill_ratio_given = fill;
2511   if (ai[am] != 0) {
2512     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2513     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2514   } else {
2515     fact->info.fill_ratio_needed = 0.0;
2516   }
2517 #if defined(PETSC_USE_INFO)
2518   if (ai[am] != 0) {
2519     PetscReal af = fact->info.fill_ratio_needed;
2520     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2521     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2522     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2523   } else {
2524     PetscCall(PetscInfo(A, "Empty matrix\n"));
2525   }
2526 #endif
2527   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2528   PetscFunctionReturn(0);
2529 }
2530 
2531 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
2532   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2533   Mat_SeqSBAIJ      *b;
2534   PetscBool          perm_identity, missing;
2535   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2536   const PetscInt    *rip, *riip;
2537   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2538   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2539   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2540   PetscReal          fill = info->fill, levels = info->levels;
2541   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2542   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2543   PetscBT            lnkbt;
2544   IS                 iperm;
2545 
2546   PetscFunctionBegin;
2547   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2548   PetscCall(MatMissingDiagonal(A, &missing, &d));
2549   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2550   PetscCall(ISIdentity(perm, &perm_identity));
2551   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2552 
2553   PetscCall(PetscMalloc1(am + 1, &ui));
2554   PetscCall(PetscMalloc1(am + 1, &udiag));
2555   ui[0] = 0;
2556 
2557   /* ICC(0) without matrix ordering: simply copies fill pattern */
2558   if (!levels && perm_identity) {
2559     for (i = 0; i < am; i++) {
2560       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2561       udiag[i]  = ui[i];
2562     }
2563     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2564     cols = uj;
2565     for (i = 0; i < am; i++) {
2566       aj    = a->j + a->diag[i];
2567       ncols = ui[i + 1] - ui[i];
2568       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2569     }
2570   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2571     PetscCall(ISGetIndices(iperm, &riip));
2572     PetscCall(ISGetIndices(perm, &rip));
2573 
2574     /* initialization */
2575     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2576 
2577     /* jl: linked list for storing indices of the pivot rows
2578        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2579     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2580     for (i = 0; i < am; i++) {
2581       jl[i] = am;
2582       il[i] = 0;
2583     }
2584 
2585     /* create and initialize a linked list for storing column indices of the active row k */
2586     nlnk = am + 1;
2587     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2588 
2589     /* initial FreeSpace size is fill*(ai[am]+1) */
2590     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2591     current_space = free_space;
2592     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2593     current_space_lvl = free_space_lvl;
2594 
2595     for (k = 0; k < am; k++) { /* for each active row k */
2596       /* initialize lnk by the column indices of row rip[k] of A */
2597       nzk   = 0;
2598       ncols = ai[rip[k] + 1] - ai[rip[k]];
2599       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2600       ncols_upper = 0;
2601       for (j = 0; j < ncols; j++) {
2602         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2603         if (riip[i] >= k) {         /* only take upper triangular entry */
2604           ajtmp[ncols_upper] = i;
2605           ncols_upper++;
2606         }
2607       }
2608       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2609       nzk += nlnk;
2610 
2611       /* update lnk by computing fill-in for each pivot row to be merged in */
2612       prow = jl[k]; /* 1st pivot row */
2613 
2614       while (prow < k) {
2615         nextprow = jl[prow];
2616 
2617         /* merge prow into k-th row */
2618         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2619         jmax  = ui[prow + 1];
2620         ncols = jmax - jmin;
2621         i     = jmin - ui[prow];
2622         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2623         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2624         j     = *(uj - 1);
2625         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2626         nzk += nlnk;
2627 
2628         /* update il and jl for prow */
2629         if (jmin < jmax) {
2630           il[prow] = jmin;
2631           j        = *cols;
2632           jl[prow] = jl[j];
2633           jl[j]    = prow;
2634         }
2635         prow = nextprow;
2636       }
2637 
2638       /* if free space is not available, make more free space */
2639       if (current_space->local_remaining < nzk) {
2640         i = am - k + 1;                                    /* num of unfactored rows */
2641         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2642         PetscCall(PetscFreeSpaceGet(i, &current_space));
2643         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2644         reallocs++;
2645       }
2646 
2647       /* copy data into free_space and free_space_lvl, then initialize lnk */
2648       PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2649       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2650 
2651       /* add the k-th row into il and jl */
2652       if (nzk > 1) {
2653         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2654         jl[k] = jl[i];
2655         jl[i] = k;
2656         il[k] = ui[k] + 1;
2657       }
2658       uj_ptr[k]     = current_space->array;
2659       uj_lvl_ptr[k] = current_space_lvl->array;
2660 
2661       current_space->array += nzk;
2662       current_space->local_used += nzk;
2663       current_space->local_remaining -= nzk;
2664 
2665       current_space_lvl->array += nzk;
2666       current_space_lvl->local_used += nzk;
2667       current_space_lvl->local_remaining -= nzk;
2668 
2669       ui[k + 1] = ui[k] + nzk;
2670     }
2671 
2672 #if defined(PETSC_USE_INFO)
2673     if (ai[am] != 0) {
2674       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2675       PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2676       PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2677       PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2678     } else {
2679       PetscCall(PetscInfo(A, "Empty matrix\n"));
2680     }
2681 #endif
2682 
2683     PetscCall(ISRestoreIndices(perm, &rip));
2684     PetscCall(ISRestoreIndices(iperm, &riip));
2685     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2686     PetscCall(PetscFree(ajtmp));
2687 
2688     /* destroy list of free space and other temporary array(s) */
2689     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2690     PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2691     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2692     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2693 
2694   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2695 
2696   /* put together the new matrix in MATSEQSBAIJ format */
2697 
2698   b               = (Mat_SeqSBAIJ *)fact->data;
2699   b->singlemalloc = PETSC_FALSE;
2700 
2701   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2702 
2703   b->j         = uj;
2704   b->i         = ui;
2705   b->diag      = udiag;
2706   b->free_diag = PETSC_TRUE;
2707   b->ilen      = NULL;
2708   b->imax      = NULL;
2709   b->row       = perm;
2710   b->col       = perm;
2711 
2712   PetscCall(PetscObjectReference((PetscObject)perm));
2713   PetscCall(PetscObjectReference((PetscObject)perm));
2714 
2715   b->icol          = iperm;
2716   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2717   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2718   PetscCall(PetscLogObjectMemory((PetscObject)fact, (ui[am] - am) * (sizeof(PetscInt) + sizeof(MatScalar))));
2719   b->maxnz = b->nz = ui[am];
2720   b->free_a        = PETSC_TRUE;
2721   b->free_ij       = PETSC_TRUE;
2722 
2723   fact->info.factor_mallocs   = reallocs;
2724   fact->info.fill_ratio_given = fill;
2725   if (ai[am] != 0) {
2726     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2727   } else {
2728     fact->info.fill_ratio_needed = 0.0;
2729   }
2730   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2731   PetscFunctionReturn(0);
2732 }
2733 
2734 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
2735   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2736   Mat_SeqSBAIJ      *b;
2737   PetscBool          perm_identity, missing;
2738   PetscReal          fill = info->fill;
2739   const PetscInt    *rip, *riip;
2740   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2741   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2742   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2743   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2744   PetscBT            lnkbt;
2745   IS                 iperm;
2746 
2747   PetscFunctionBegin;
2748   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2749   PetscCall(MatMissingDiagonal(A, &missing, &i));
2750   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2751 
2752   /* check whether perm is the identity mapping */
2753   PetscCall(ISIdentity(perm, &perm_identity));
2754   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2755   PetscCall(ISGetIndices(iperm, &riip));
2756   PetscCall(ISGetIndices(perm, &rip));
2757 
2758   /* initialization */
2759   PetscCall(PetscMalloc1(am + 1, &ui));
2760   PetscCall(PetscMalloc1(am + 1, &udiag));
2761   ui[0] = 0;
2762 
2763   /* jl: linked list for storing indices of the pivot rows
2764      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2765   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2766   for (i = 0; i < am; i++) {
2767     jl[i] = am;
2768     il[i] = 0;
2769   }
2770 
2771   /* create and initialize a linked list for storing column indices of the active row k */
2772   nlnk = am + 1;
2773   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2774 
2775   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2776   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2777   current_space = free_space;
2778 
2779   for (k = 0; k < am; k++) { /* for each active row k */
2780     /* initialize lnk by the column indices of row rip[k] of A */
2781     nzk   = 0;
2782     ncols = ai[rip[k] + 1] - ai[rip[k]];
2783     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2784     ncols_upper = 0;
2785     for (j = 0; j < ncols; j++) {
2786       i = riip[*(aj + ai[rip[k]] + j)];
2787       if (i >= k) { /* only take upper triangular entry */
2788         cols[ncols_upper] = i;
2789         ncols_upper++;
2790       }
2791     }
2792     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2793     nzk += nlnk;
2794 
2795     /* update lnk by computing fill-in for each pivot row to be merged in */
2796     prow = jl[k]; /* 1st pivot row */
2797 
2798     while (prow < k) {
2799       nextprow = jl[prow];
2800       /* merge prow into k-th row */
2801       jmin     = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2802       jmax     = ui[prow + 1];
2803       ncols    = jmax - jmin;
2804       uj_ptr   = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2805       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2806       nzk += nlnk;
2807 
2808       /* update il and jl for prow */
2809       if (jmin < jmax) {
2810         il[prow] = jmin;
2811         j        = *uj_ptr;
2812         jl[prow] = jl[j];
2813         jl[j]    = prow;
2814       }
2815       prow = nextprow;
2816     }
2817 
2818     /* if free space is not available, make more free space */
2819     if (current_space->local_remaining < nzk) {
2820       i = am - k + 1;                                    /* num of unfactored rows */
2821       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2822       PetscCall(PetscFreeSpaceGet(i, &current_space));
2823       reallocs++;
2824     }
2825 
2826     /* copy data into free space, then initialize lnk */
2827     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2828 
2829     /* add the k-th row into il and jl */
2830     if (nzk > 1) {
2831       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2832       jl[k] = jl[i];
2833       jl[i] = k;
2834       il[k] = ui[k] + 1;
2835     }
2836     ui_ptr[k] = current_space->array;
2837 
2838     current_space->array += nzk;
2839     current_space->local_used += nzk;
2840     current_space->local_remaining -= nzk;
2841 
2842     ui[k + 1] = ui[k] + nzk;
2843   }
2844 
2845   PetscCall(ISRestoreIndices(perm, &rip));
2846   PetscCall(ISRestoreIndices(iperm, &riip));
2847   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2848 
2849   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2850   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2851   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2852   PetscCall(PetscLLDestroy(lnk, lnkbt));
2853 
2854   /* put together the new matrix in MATSEQSBAIJ format */
2855 
2856   b               = (Mat_SeqSBAIJ *)fact->data;
2857   b->singlemalloc = PETSC_FALSE;
2858   b->free_a       = PETSC_TRUE;
2859   b->free_ij      = PETSC_TRUE;
2860 
2861   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2862 
2863   b->j         = uj;
2864   b->i         = ui;
2865   b->diag      = udiag;
2866   b->free_diag = PETSC_TRUE;
2867   b->ilen      = NULL;
2868   b->imax      = NULL;
2869   b->row       = perm;
2870   b->col       = perm;
2871 
2872   PetscCall(PetscObjectReference((PetscObject)perm));
2873   PetscCall(PetscObjectReference((PetscObject)perm));
2874 
2875   b->icol          = iperm;
2876   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2877 
2878   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2879   PetscCall(PetscLogObjectMemory((PetscObject)fact, ui[am] * (sizeof(PetscInt) + sizeof(MatScalar))));
2880 
2881   b->maxnz = b->nz = ui[am];
2882 
2883   fact->info.factor_mallocs   = reallocs;
2884   fact->info.fill_ratio_given = fill;
2885   if (ai[am] != 0) {
2886     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2887     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2888   } else {
2889     fact->info.fill_ratio_needed = 0.0;
2890   }
2891 #if defined(PETSC_USE_INFO)
2892   if (ai[am] != 0) {
2893     PetscReal af = fact->info.fill_ratio_needed;
2894     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2895     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2896     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2897   } else {
2898     PetscCall(PetscInfo(A, "Empty matrix\n"));
2899   }
2900 #endif
2901   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2902   PetscFunctionReturn(0);
2903 }
2904 
2905 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) {
2906   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2907   Mat_SeqSBAIJ      *b;
2908   PetscBool          perm_identity, missing;
2909   PetscReal          fill = info->fill;
2910   const PetscInt    *rip, *riip;
2911   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2912   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2913   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
2914   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2915   PetscBT            lnkbt;
2916   IS                 iperm;
2917 
2918   PetscFunctionBegin;
2919   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2920   PetscCall(MatMissingDiagonal(A, &missing, &i));
2921   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2922 
2923   /* check whether perm is the identity mapping */
2924   PetscCall(ISIdentity(perm, &perm_identity));
2925   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2926   PetscCall(ISGetIndices(iperm, &riip));
2927   PetscCall(ISGetIndices(perm, &rip));
2928 
2929   /* initialization */
2930   PetscCall(PetscMalloc1(am + 1, &ui));
2931   ui[0] = 0;
2932 
2933   /* jl: linked list for storing indices of the pivot rows
2934      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2935   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2936   for (i = 0; i < am; i++) {
2937     jl[i] = am;
2938     il[i] = 0;
2939   }
2940 
2941   /* create and initialize a linked list for storing column indices of the active row k */
2942   nlnk = am + 1;
2943   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2944 
2945   /* initial FreeSpace size is fill*(ai[am]+1) */
2946   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2947   current_space = free_space;
2948 
2949   for (k = 0; k < am; k++) { /* for each active row k */
2950     /* initialize lnk by the column indices of row rip[k] of A */
2951     nzk   = 0;
2952     ncols = ai[rip[k] + 1] - ai[rip[k]];
2953     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2954     ncols_upper = 0;
2955     for (j = 0; j < ncols; j++) {
2956       i = riip[*(aj + ai[rip[k]] + j)];
2957       if (i >= k) { /* only take upper triangular entry */
2958         cols[ncols_upper] = i;
2959         ncols_upper++;
2960       }
2961     }
2962     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2963     nzk += nlnk;
2964 
2965     /* update lnk by computing fill-in for each pivot row to be merged in */
2966     prow = jl[k]; /* 1st pivot row */
2967 
2968     while (prow < k) {
2969       nextprow = jl[prow];
2970       /* merge prow into k-th row */
2971       jmin     = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2972       jmax     = ui[prow + 1];
2973       ncols    = jmax - jmin;
2974       uj_ptr   = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2975       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2976       nzk += nlnk;
2977 
2978       /* update il and jl for prow */
2979       if (jmin < jmax) {
2980         il[prow] = jmin;
2981         j        = *uj_ptr;
2982         jl[prow] = jl[j];
2983         jl[j]    = prow;
2984       }
2985       prow = nextprow;
2986     }
2987 
2988     /* if free space is not available, make more free space */
2989     if (current_space->local_remaining < nzk) {
2990       i = am - k + 1;                     /* num of unfactored rows */
2991       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2992       PetscCall(PetscFreeSpaceGet(i, &current_space));
2993       reallocs++;
2994     }
2995 
2996     /* copy data into free space, then initialize lnk */
2997     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2998 
2999     /* add the k-th row into il and jl */
3000     if (nzk - 1 > 0) {
3001       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3002       jl[k] = jl[i];
3003       jl[i] = k;
3004       il[k] = ui[k] + 1;
3005     }
3006     ui_ptr[k] = current_space->array;
3007 
3008     current_space->array += nzk;
3009     current_space->local_used += nzk;
3010     current_space->local_remaining -= nzk;
3011 
3012     ui[k + 1] = ui[k] + nzk;
3013   }
3014 
3015 #if defined(PETSC_USE_INFO)
3016   if (ai[am] != 0) {
3017     PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]);
3018     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3019     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3020     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3021   } else {
3022     PetscCall(PetscInfo(A, "Empty matrix\n"));
3023   }
3024 #endif
3025 
3026   PetscCall(ISRestoreIndices(perm, &rip));
3027   PetscCall(ISRestoreIndices(iperm, &riip));
3028   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
3029 
3030   /* destroy list of free space and other temporary array(s) */
3031   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
3032   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3033   PetscCall(PetscLLDestroy(lnk, lnkbt));
3034 
3035   /* put together the new matrix in MATSEQSBAIJ format */
3036 
3037   b               = (Mat_SeqSBAIJ *)fact->data;
3038   b->singlemalloc = PETSC_FALSE;
3039   b->free_a       = PETSC_TRUE;
3040   b->free_ij      = PETSC_TRUE;
3041 
3042   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
3043 
3044   b->j    = uj;
3045   b->i    = ui;
3046   b->diag = NULL;
3047   b->ilen = NULL;
3048   b->imax = NULL;
3049   b->row  = perm;
3050   b->col  = perm;
3051 
3052   PetscCall(PetscObjectReference((PetscObject)perm));
3053   PetscCall(PetscObjectReference((PetscObject)perm));
3054 
3055   b->icol          = iperm;
3056   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3057 
3058   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3059   PetscCall(PetscLogObjectMemory((PetscObject)fact, (ui[am] - am) * (sizeof(PetscInt) + sizeof(MatScalar))));
3060   b->maxnz = b->nz = ui[am];
3061 
3062   fact->info.factor_mallocs   = reallocs;
3063   fact->info.fill_ratio_given = fill;
3064   if (ai[am] != 0) {
3065     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
3066   } else {
3067     fact->info.fill_ratio_needed = 0.0;
3068   }
3069   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3070   PetscFunctionReturn(0);
3071 }
3072 
3073 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) {
3074   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3075   PetscInt           n  = A->rmap->n;
3076   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3077   PetscScalar       *x, sum;
3078   const PetscScalar *b;
3079   const MatScalar   *aa = a->a, *v;
3080   PetscInt           i, nz;
3081 
3082   PetscFunctionBegin;
3083   if (!n) PetscFunctionReturn(0);
3084 
3085   PetscCall(VecGetArrayRead(bb, &b));
3086   PetscCall(VecGetArrayWrite(xx, &x));
3087 
3088   /* forward solve the lower triangular */
3089   x[0] = b[0];
3090   v    = aa;
3091   vi   = aj;
3092   for (i = 1; i < n; i++) {
3093     nz  = ai[i + 1] - ai[i];
3094     sum = b[i];
3095     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3096     v += nz;
3097     vi += nz;
3098     x[i] = sum;
3099   }
3100 
3101   /* backward solve the upper triangular */
3102   for (i = n - 1; i >= 0; i--) {
3103     v   = aa + adiag[i + 1] + 1;
3104     vi  = aj + adiag[i + 1] + 1;
3105     nz  = adiag[i] - adiag[i + 1] - 1;
3106     sum = x[i];
3107     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3108     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3109   }
3110 
3111   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3112   PetscCall(VecRestoreArrayRead(bb, &b));
3113   PetscCall(VecRestoreArrayWrite(xx, &x));
3114   PetscFunctionReturn(0);
3115 }
3116 
3117 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) {
3118   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3119   IS                 iscol = a->col, isrow = a->row;
3120   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3121   const PetscInt    *rout, *cout, *r, *c;
3122   PetscScalar       *x, *tmp, sum;
3123   const PetscScalar *b;
3124   const MatScalar   *aa = a->a, *v;
3125 
3126   PetscFunctionBegin;
3127   if (!n) PetscFunctionReturn(0);
3128 
3129   PetscCall(VecGetArrayRead(bb, &b));
3130   PetscCall(VecGetArrayWrite(xx, &x));
3131   tmp = a->solve_work;
3132 
3133   PetscCall(ISGetIndices(isrow, &rout));
3134   r = rout;
3135   PetscCall(ISGetIndices(iscol, &cout));
3136   c = cout;
3137 
3138   /* forward solve the lower triangular */
3139   tmp[0] = b[r[0]];
3140   v      = aa;
3141   vi     = aj;
3142   for (i = 1; i < n; i++) {
3143     nz  = ai[i + 1] - ai[i];
3144     sum = b[r[i]];
3145     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3146     tmp[i] = sum;
3147     v += nz;
3148     vi += nz;
3149   }
3150 
3151   /* backward solve the upper triangular */
3152   for (i = n - 1; i >= 0; i--) {
3153     v   = aa + adiag[i + 1] + 1;
3154     vi  = aj + adiag[i + 1] + 1;
3155     nz  = adiag[i] - adiag[i + 1] - 1;
3156     sum = tmp[i];
3157     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3158     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3159   }
3160 
3161   PetscCall(ISRestoreIndices(isrow, &rout));
3162   PetscCall(ISRestoreIndices(iscol, &cout));
3163   PetscCall(VecRestoreArrayRead(bb, &b));
3164   PetscCall(VecRestoreArrayWrite(xx, &x));
3165   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3166   PetscFunctionReturn(0);
3167 }
3168 
3169 /*
3170     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3171 */
3172 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact) {
3173   Mat             B = *fact;
3174   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3175   IS              isicol;
3176   const PetscInt *r, *ic;
3177   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3178   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3179   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3180   PetscInt        nlnk, *lnk;
3181   PetscBT         lnkbt;
3182   PetscBool       row_identity, icol_identity;
3183   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3184   const PetscInt *ics;
3185   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3186   PetscReal       dt = info->dt, shift = info->shiftamount;
3187   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3188   PetscBool       missing;
3189 
3190   PetscFunctionBegin;
3191   if (dt == PETSC_DEFAULT) dt = 0.005;
3192   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3193 
3194   /* ------- symbolic factorization, can be reused ---------*/
3195   PetscCall(MatMissingDiagonal(A, &missing, &i));
3196   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3197   adiag = a->diag;
3198 
3199   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3200 
3201   /* bdiag is location of diagonal in factor */
3202   PetscCall(PetscMalloc1(n + 1, &bdiag));     /* becomes b->diag */
3203   PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */
3204 
3205   /* allocate row pointers bi */
3206   PetscCall(PetscMalloc1(2 * n + 2, &bi));
3207 
3208   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3209   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3210   nnz_max = ai[n] + 2 * n * dtcount + 2;
3211 
3212   PetscCall(PetscMalloc1(nnz_max + 1, &bj));
3213   PetscCall(PetscMalloc1(nnz_max + 1, &ba));
3214 
3215   /* put together the new matrix */
3216   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3217   PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)isicol));
3218   b = (Mat_SeqAIJ *)B->data;
3219 
3220   b->free_a       = PETSC_TRUE;
3221   b->free_ij      = PETSC_TRUE;
3222   b->singlemalloc = PETSC_FALSE;
3223 
3224   b->a    = ba;
3225   b->j    = bj;
3226   b->i    = bi;
3227   b->diag = bdiag;
3228   b->ilen = NULL;
3229   b->imax = NULL;
3230   b->row  = isrow;
3231   b->col  = iscol;
3232   PetscCall(PetscObjectReference((PetscObject)isrow));
3233   PetscCall(PetscObjectReference((PetscObject)iscol));
3234   b->icol = isicol;
3235 
3236   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3237   PetscCall(PetscLogObjectMemory((PetscObject)B, nnz_max * (sizeof(PetscInt) + sizeof(MatScalar))));
3238   b->maxnz = nnz_max;
3239 
3240   B->factortype            = MAT_FACTOR_ILUDT;
3241   B->info.factor_mallocs   = 0;
3242   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3243   /* ------- end of symbolic factorization ---------*/
3244 
3245   PetscCall(ISGetIndices(isrow, &r));
3246   PetscCall(ISGetIndices(isicol, &ic));
3247   ics = ic;
3248 
3249   /* linked list for storing column indices of the active row */
3250   nlnk = n + 1;
3251   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
3252 
3253   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3254   PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3255   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3256   PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3257   PetscCall(PetscArrayzero(rtmp, n));
3258 
3259   bi[0]         = 0;
3260   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3261   bdiag_rev[n]  = bdiag[0];
3262   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3263   for (i = 0; i < n; i++) {
3264     /* copy initial fill into linked list */
3265     nzi = ai[r[i] + 1] - ai[r[i]];
3266     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3267     nzi_al = adiag[r[i]] - ai[r[i]];
3268     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3269     ajtmp  = aj + ai[r[i]];
3270     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3271 
3272     /* load in initial (unfactored row) */
3273     aatmp = a->a + ai[r[i]];
3274     for (j = 0; j < nzi; j++) { rtmp[ics[*ajtmp++]] = *aatmp++; }
3275 
3276     /* add pivot rows into linked list */
3277     row = lnk[n];
3278     while (row < i) {
3279       nzi_bl = bi[row + 1] - bi[row] + 1;
3280       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3281       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3282       nzi += nlnk;
3283       row = lnk[row];
3284     }
3285 
3286     /* copy data from lnk into jtmp, then initialize lnk */
3287     PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));
3288 
3289     /* numerical factorization */
3290     bjtmp = jtmp;
3291     row   = *bjtmp++; /* 1st pivot row */
3292     while (row < i) {
3293       pc         = rtmp + row;
3294       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3295       multiplier = (*pc) * (*pv);
3296       *pc        = multiplier;
3297       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3298         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3299         pv = ba + bdiag[row + 1] + 1;
3300         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3301         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3302         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3303       }
3304       row = *bjtmp++;
3305     }
3306 
3307     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3308     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3309     nzi_bl   = 0;
3310     j        = 0;
3311     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3312       vtmp[j]       = rtmp[jtmp[j]];
3313       rtmp[jtmp[j]] = 0.0;
3314       nzi_bl++;
3315       j++;
3316     }
3317     nzi_bu = nzi - nzi_bl - 1;
3318     while (j < nzi) {
3319       vtmp[j]       = rtmp[jtmp[j]];
3320       rtmp[jtmp[j]] = 0.0;
3321       j++;
3322     }
3323 
3324     bjtmp = bj + bi[i];
3325     batmp = ba + bi[i];
3326     /* apply level dropping rule to L part */
3327     ncut  = nzi_al + dtcount;
3328     if (ncut < nzi_bl) {
3329       PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3330       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3331     } else {
3332       ncut = nzi_bl;
3333     }
3334     for (j = 0; j < ncut; j++) {
3335       bjtmp[j] = jtmp[j];
3336       batmp[j] = vtmp[j];
3337     }
3338     bi[i + 1] = bi[i] + ncut;
3339     nzi       = ncut + 1;
3340 
3341     /* apply level dropping rule to U part */
3342     ncut = nzi_au + dtcount;
3343     if (ncut < nzi_bu) {
3344       PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3345       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3346     } else {
3347       ncut = nzi_bu;
3348     }
3349     nzi += ncut;
3350 
3351     /* mark bdiagonal */
3352     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3353     bdiag_rev[n - i - 1] = bdiag[i + 1];
3354     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
3355     bjtmp                = bj + bdiag[i];
3356     batmp                = ba + bdiag[i];
3357     *bjtmp               = i;
3358     *batmp               = diag_tmp; /* rtmp[i]; */
3359     if (*batmp == 0.0) { *batmp = dt + shift; }
3360     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
3361 
3362     bjtmp = bj + bdiag[i + 1] + 1;
3363     batmp = ba + bdiag[i + 1] + 1;
3364     for (k = 0; k < ncut; k++) {
3365       bjtmp[k] = jtmp[nzi_bl + 1 + k];
3366       batmp[k] = vtmp[nzi_bl + 1 + k];
3367     }
3368 
3369     im[i] = nzi; /* used by PetscLLAddSortedLU() */
3370   }              /* for (i=0; i<n; i++) */
3371   PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);
3372 
3373   PetscCall(ISRestoreIndices(isrow, &r));
3374   PetscCall(ISRestoreIndices(isicol, &ic));
3375 
3376   PetscCall(PetscLLDestroy(lnk, lnkbt));
3377   PetscCall(PetscFree2(im, jtmp));
3378   PetscCall(PetscFree2(rtmp, vtmp));
3379   PetscCall(PetscFree(bdiag_rev));
3380 
3381   PetscCall(PetscLogFlops(B->cmap->n));
3382   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3383 
3384   PetscCall(ISIdentity(isrow, &row_identity));
3385   PetscCall(ISIdentity(isicol, &icol_identity));
3386   if (row_identity && icol_identity) {
3387     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3388   } else {
3389     B->ops->solve = MatSolve_SeqAIJ;
3390   }
3391 
3392   B->ops->solveadd          = NULL;
3393   B->ops->solvetranspose    = NULL;
3394   B->ops->solvetransposeadd = NULL;
3395   B->ops->matsolve          = NULL;
3396   B->assembled              = PETSC_TRUE;
3397   B->preallocated           = PETSC_TRUE;
3398   PetscFunctionReturn(0);
3399 }
3400 
3401 /* a wraper of MatILUDTFactor_SeqAIJ() */
3402 /*
3403     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3404 */
3405 
3406 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) {
3407   PetscFunctionBegin;
3408   PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3409   PetscFunctionReturn(0);
3410 }
3411 
3412 /*
3413    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3414    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3415 */
3416 /*
3417     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3418 */
3419 
3420 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info) {
3421   Mat             C = fact;
3422   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3423   IS              isrow = b->row, isicol = b->icol;
3424   const PetscInt *r, *ic, *ics;
3425   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3426   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3427   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3428   PetscReal       dt = info->dt, shift = info->shiftamount;
3429   PetscBool       row_identity, col_identity;
3430 
3431   PetscFunctionBegin;
3432   PetscCall(ISGetIndices(isrow, &r));
3433   PetscCall(ISGetIndices(isicol, &ic));
3434   PetscCall(PetscMalloc1(n + 1, &rtmp));
3435   ics = ic;
3436 
3437   for (i = 0; i < n; i++) {
3438     /* initialize rtmp array */
3439     nzl   = bi[i + 1] - bi[i]; /* num of nozeros in L(i,:) */
3440     bjtmp = bj + bi[i];
3441     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3442     rtmp[i] = 0.0;
3443     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nozeros in U(i,:) */
3444     bjtmp   = bj + bdiag[i + 1] + 1;
3445     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
3446 
3447     /* load in initial unfactored row of A */
3448     nz    = ai[r[i] + 1] - ai[r[i]];
3449     ajtmp = aj + ai[r[i]];
3450     v     = aa + ai[r[i]];
3451     for (j = 0; j < nz; j++) { rtmp[ics[*ajtmp++]] = v[j]; }
3452 
3453     /* numerical factorization */
3454     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3455     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3456     k     = 0;
3457     while (k < nzl) {
3458       row        = *bjtmp++;
3459       pc         = rtmp + row;
3460       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3461       multiplier = (*pc) * (*pv);
3462       *pc        = multiplier;
3463       if (PetscAbsScalar(multiplier) > dt) {
3464         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3465         pv = b->a + bdiag[row + 1] + 1;
3466         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3467         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3468         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3469       }
3470       k++;
3471     }
3472 
3473     /* finished row so stick it into b->a */
3474     /* L-part */
3475     pv  = b->a + bi[i];
3476     pj  = bj + bi[i];
3477     nzl = bi[i + 1] - bi[i];
3478     for (j = 0; j < nzl; j++) { pv[j] = rtmp[pj[j]]; }
3479 
3480     /* diagonal: invert diagonal entries for simpler triangular solves */
3481     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3482     b->a[bdiag[i]] = 1.0 / rtmp[i];
3483 
3484     /* U-part */
3485     pv  = b->a + bdiag[i + 1] + 1;
3486     pj  = bj + bdiag[i + 1] + 1;
3487     nzu = bdiag[i] - bdiag[i + 1] - 1;
3488     for (j = 0; j < nzu; j++) { pv[j] = rtmp[pj[j]]; }
3489   }
3490 
3491   PetscCall(PetscFree(rtmp));
3492   PetscCall(ISRestoreIndices(isicol, &ic));
3493   PetscCall(ISRestoreIndices(isrow, &r));
3494 
3495   PetscCall(ISIdentity(isrow, &row_identity));
3496   PetscCall(ISIdentity(isicol, &col_identity));
3497   if (row_identity && col_identity) {
3498     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3499   } else {
3500     C->ops->solve = MatSolve_SeqAIJ;
3501   }
3502   C->ops->solveadd          = NULL;
3503   C->ops->solvetranspose    = NULL;
3504   C->ops->solvetransposeadd = NULL;
3505   C->ops->matsolve          = NULL;
3506   C->assembled              = PETSC_TRUE;
3507   C->preallocated           = PETSC_TRUE;
3508 
3509   PetscCall(PetscLogFlops(C->cmap->n));
3510   PetscFunctionReturn(0);
3511 }
3512