xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 8d47944aebcd8a3a8a4a1aebd4b3f440b4d70fab)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/sbaij/seq/sbaij.h>
3 #include <petscbt.h>
4 #include <../src/mat/utils/freespace.h>
5 
6 /*
7       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
8 
9       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
10 */
11 #if 0
12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
13 {
14   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
15   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
16   const PetscInt    *ai = a->i, *aj = a->j;
17   const PetscScalar *aa = a->a;
18   PetscBool         *done;
19   PetscReal          best, past = 0, future;
20 
21   PetscFunctionBegin;
22   /* pick initial row */
23   best = -1;
24   for (i = 0; i < n; i++) {
25     future = 0.0;
26     for (j = ai[i]; j < ai[i + 1]; j++) {
27       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
28       else past = PetscAbsScalar(aa[j]);
29     }
30     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
31     if (past / future > best) {
32       best    = past / future;
33       current = i;
34     }
35   }
36 
37   PetscCall(PetscMalloc1(n, &done));
38   PetscCall(PetscArrayzero(done, n));
39   PetscCall(PetscMalloc1(n, &order));
40   order[0] = current;
41   for (i = 0; i < n - 1; i++) {
42     done[current] = PETSC_TRUE;
43     best          = -1;
44     /* loop over all neighbors of current pivot */
45     for (j = ai[current]; j < ai[current + 1]; j++) {
46       jj = aj[j];
47       if (done[jj]) continue;
48       /* loop over columns of potential next row computing weights for below and above diagonal */
49       past = future = 0.0;
50       for (k = ai[jj]; k < ai[jj + 1]; k++) {
51         kk = aj[k];
52         if (done[kk]) past += PetscAbsScalar(aa[k]);
53         else if (kk != jj) future += PetscAbsScalar(aa[k]);
54       }
55       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
56       if (past / future > best) {
57         best       = past / future;
58         newcurrent = jj;
59       }
60     }
61     if (best == -1) { /* no neighbors to select from so select best of all that remain */
62       best = -1;
63       for (k = 0; k < n; k++) {
64         if (done[k]) continue;
65         future = 0.0;
66         past   = 0.0;
67         for (j = ai[k]; j < ai[k + 1]; j++) {
68           kk = aj[j];
69           if (done[kk]) past += PetscAbsScalar(aa[j]);
70           else if (kk != k) future += PetscAbsScalar(aa[j]);
71         }
72         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
73         if (past / future > best) {
74           best       = past / future;
75           newcurrent = k;
76         }
77       }
78     }
79     PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
80     current      = newcurrent;
81     order[i + 1] = current;
82   }
83   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
84   *icol = *irow;
85   PetscCall(PetscObjectReference((PetscObject)*irow));
86   PetscCall(PetscFree(done));
87   PetscCall(PetscFree(order));
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 #endif
91 
92 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
93 {
94   PetscFunctionBegin;
95   *type = MATSOLVERPETSC;
96   PetscFunctionReturn(PETSC_SUCCESS);
97 }
98 
99 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
100 {
101   PetscInt n = A->rmap->n;
102 
103   PetscFunctionBegin;
104 #if defined(PETSC_USE_COMPLEX)
105   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY or ICC Factor is not supported");
106 #endif
107   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
108   PetscCall(MatSetSizes(*B, n, n, n, n));
109   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
110     PetscCall(MatSetType(*B, MATSEQAIJ));
111 
112     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
113     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
114 
115     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
116     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
117     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
118     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
119   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
120     PetscCall(MatSetType(*B, MATSEQSBAIJ));
121     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
122 
123     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
124     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
125     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
126     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
127   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
128   (*B)->factortype = ftype;
129 
130   PetscCall(PetscFree((*B)->solvertype));
131   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
132   (*B)->canuseordering = PETSC_TRUE;
133   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
134   PetscFunctionReturn(PETSC_SUCCESS);
135 }
136 
137 #if 0
138 // currently unused
139 static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
140 {
141   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
142   IS                 isicol;
143   const PetscInt    *r, *ic;
144   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
145   PetscInt          *bi, *bj, *ajtmp;
146   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
147   PetscReal          f;
148   PetscInt           nlnk, *lnk, k, **bi_ptr;
149   PetscFreeSpaceList free_space = NULL, current_space = NULL;
150   PetscBT            lnkbt;
151   PetscBool          missing;
152 
153   PetscFunctionBegin;
154   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
155   PetscCall(MatMissingDiagonal(A, &missing, &i));
156   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
157 
158   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
159   PetscCall(ISGetIndices(isrow, &r));
160   PetscCall(ISGetIndices(isicol, &ic));
161 
162   /* get new row pointers */
163   PetscCall(PetscMalloc1(n + 1, &bi));
164   bi[0] = 0;
165 
166   /* bdiag is location of diagonal in factor */
167   PetscCall(PetscMalloc1(n + 1, &bdiag));
168   bdiag[0] = 0;
169 
170   /* linked list for storing column indices of the active row */
171   nlnk = n + 1;
172   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
173 
174   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
175 
176   /* initial FreeSpace size is f*(ai[n]+1) */
177   f = info->fill;
178   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
179   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
180   current_space = free_space;
181 
182   for (i = 0; i < n; i++) {
183     /* copy previous fill into linked list */
184     nzi   = 0;
185     nnz   = ai[r[i] + 1] - ai[r[i]];
186     ajtmp = aj + ai[r[i]];
187     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
188     nzi += nlnk;
189 
190     /* add pivot rows into linked list */
191     row = lnk[n];
192     while (row < i) {
193       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
194       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
195       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
196       nzi += nlnk;
197       row = lnk[row];
198     }
199     bi[i + 1] = bi[i] + nzi;
200     im[i]     = nzi;
201 
202     /* mark bdiag */
203     nzbd = 0;
204     nnz  = nzi;
205     k    = lnk[n];
206     while (nnz-- && k < i) {
207       nzbd++;
208       k = lnk[k];
209     }
210     bdiag[i] = bi[i] + nzbd;
211 
212     /* if free space is not available, make more free space */
213     if (current_space->local_remaining < nzi) {
214       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
215       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
216       reallocs++;
217     }
218 
219     /* copy data into free space, then initialize lnk */
220     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
221 
222     bi_ptr[i] = current_space->array;
223     current_space->array += nzi;
224     current_space->local_used += nzi;
225     current_space->local_remaining -= nzi;
226   }
227   #if defined(PETSC_USE_INFO)
228   if (ai[n] != 0) {
229     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
230     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
231     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
232     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
233     PetscCall(PetscInfo(A, "for best performance.\n"));
234   } else {
235     PetscCall(PetscInfo(A, "Empty matrix\n"));
236   }
237   #endif
238 
239   PetscCall(ISRestoreIndices(isrow, &r));
240   PetscCall(ISRestoreIndices(isicol, &ic));
241 
242   /* destroy list of free space and other temporary array(s) */
243   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
244   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
245   PetscCall(PetscLLDestroy(lnk, lnkbt));
246   PetscCall(PetscFree2(bi_ptr, im));
247 
248   /* put together the new matrix */
249   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
250   b = (Mat_SeqAIJ *)(B)->data;
251 
252   b->free_a       = PETSC_TRUE;
253   b->free_ij      = PETSC_TRUE;
254   b->singlemalloc = PETSC_FALSE;
255 
256   PetscCall(PetscMalloc1(bi[n] + 1, &b->a));
257   b->j    = bj;
258   b->i    = bi;
259   b->diag = bdiag;
260   b->ilen = NULL;
261   b->imax = NULL;
262   b->row  = isrow;
263   b->col  = iscol;
264   PetscCall(PetscObjectReference((PetscObject)isrow));
265   PetscCall(PetscObjectReference((PetscObject)iscol));
266   b->icol = isicol;
267   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
268 
269   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
270   b->maxnz = b->nz = bi[n];
271 
272   (B)->factortype            = MAT_FACTOR_LU;
273   (B)->info.factor_mallocs   = reallocs;
274   (B)->info.fill_ratio_given = f;
275 
276   if (ai[n]) {
277     (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
278   } else {
279     (B)->info.fill_ratio_needed = 0.0;
280   }
281   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
282   if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
283   PetscFunctionReturn(PETSC_SUCCESS);
284 }
285 #endif
286 
287 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
288 {
289   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
290   IS                 isicol;
291   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
292   PetscInt           i, n = A->rmap->n;
293   PetscInt          *bi, *bj;
294   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
295   PetscReal          f;
296   PetscInt           nlnk, *lnk, k, **bi_ptr;
297   PetscFreeSpaceList free_space = NULL, current_space = NULL;
298   PetscBT            lnkbt;
299   PetscBool          missing;
300 
301   PetscFunctionBegin;
302   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
303   PetscCall(MatMissingDiagonal(A, &missing, &i));
304   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
305 
306   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
307   PetscCall(ISGetIndices(isrow, &r));
308   PetscCall(ISGetIndices(isicol, &ic));
309 
310   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
311   PetscCall(PetscMalloc1(n + 1, &bi));
312   PetscCall(PetscMalloc1(n + 1, &bdiag));
313   bi[0] = bdiag[0] = 0;
314 
315   /* linked list for storing column indices of the active row */
316   nlnk = n + 1;
317   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
318 
319   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
320 
321   /* initial FreeSpace size is f*(ai[n]+1) */
322   f = info->fill;
323   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
324   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
325   current_space = free_space;
326 
327   for (i = 0; i < n; i++) {
328     /* copy previous fill into linked list */
329     nzi   = 0;
330     nnz   = ai[r[i] + 1] - ai[r[i]];
331     ajtmp = aj + ai[r[i]];
332     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
333     nzi += nlnk;
334 
335     /* add pivot rows into linked list */
336     row = lnk[n];
337     while (row < i) {
338       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
339       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
340       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
341       nzi += nlnk;
342       row = lnk[row];
343     }
344     bi[i + 1] = bi[i] + nzi;
345     im[i]     = nzi;
346 
347     /* mark bdiag */
348     nzbd = 0;
349     nnz  = nzi;
350     k    = lnk[n];
351     while (nnz-- && k < i) {
352       nzbd++;
353       k = lnk[k];
354     }
355     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
356 
357     /* if free space is not available, make more free space */
358     if (current_space->local_remaining < nzi) {
359       /* estimated additional space needed */
360       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
361       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
362       reallocs++;
363     }
364 
365     /* copy data into free space, then initialize lnk */
366     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
367 
368     bi_ptr[i] = current_space->array;
369     current_space->array += nzi;
370     current_space->local_used += nzi;
371     current_space->local_remaining -= nzi;
372   }
373 
374   PetscCall(ISRestoreIndices(isrow, &r));
375   PetscCall(ISRestoreIndices(isicol, &ic));
376 
377   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
378   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
379   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
380   PetscCall(PetscLLDestroy(lnk, lnkbt));
381   PetscCall(PetscFree2(bi_ptr, im));
382 
383   /* put together the new matrix */
384   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
385   b = (Mat_SeqAIJ *)(B)->data;
386 
387   b->free_a       = PETSC_TRUE;
388   b->free_ij      = PETSC_TRUE;
389   b->singlemalloc = PETSC_FALSE;
390 
391   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
392 
393   b->j    = bj;
394   b->i    = bi;
395   b->diag = bdiag;
396   b->ilen = NULL;
397   b->imax = NULL;
398   b->row  = isrow;
399   b->col  = iscol;
400   PetscCall(PetscObjectReference((PetscObject)isrow));
401   PetscCall(PetscObjectReference((PetscObject)iscol));
402   b->icol = isicol;
403   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
404 
405   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
406   b->maxnz = b->nz = bdiag[0] + 1;
407 
408   B->factortype            = MAT_FACTOR_LU;
409   B->info.factor_mallocs   = reallocs;
410   B->info.fill_ratio_given = f;
411 
412   if (ai[n]) {
413     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
414   } else {
415     B->info.fill_ratio_needed = 0.0;
416   }
417 #if defined(PETSC_USE_INFO)
418   if (ai[n] != 0) {
419     PetscReal af = B->info.fill_ratio_needed;
420     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
421     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
422     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
423     PetscCall(PetscInfo(A, "for best performance.\n"));
424   } else {
425     PetscCall(PetscInfo(A, "Empty matrix\n"));
426   }
427 #endif
428   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
429   if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
430   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
431   PetscFunctionReturn(PETSC_SUCCESS);
432 }
433 
434 /*
435     Trouble in factorization, should we dump the original matrix?
436 */
437 PetscErrorCode MatFactorDumpMatrix(Mat A)
438 {
439   PetscBool flg = PETSC_FALSE;
440 
441   PetscFunctionBegin;
442   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
443   if (flg) {
444     PetscViewer viewer;
445     char        filename[PETSC_MAX_PATH_LEN];
446 
447     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
448     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
449     PetscCall(MatView(A, viewer));
450     PetscCall(PetscViewerDestroy(&viewer));
451   }
452   PetscFunctionReturn(PETSC_SUCCESS);
453 }
454 
455 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
456 {
457   Mat              C = B;
458   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
459   IS               isrow = b->row, isicol = b->icol;
460   const PetscInt  *r, *ic, *ics;
461   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
462   PetscInt         i, j, k, nz, nzL, row, *pj;
463   const PetscInt  *ajtmp, *bjtmp;
464   MatScalar       *rtmp, *pc, multiplier, *pv;
465   const MatScalar *aa = a->a, *v;
466   PetscBool        row_identity, col_identity;
467   FactorShiftCtx   sctx;
468   const PetscInt  *ddiag;
469   PetscReal        rs;
470   MatScalar        d;
471 
472   PetscFunctionBegin;
473   /* MatPivotSetUp(): initialize shift context sctx */
474   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
475 
476   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
477     ddiag          = a->diag;
478     sctx.shift_top = info->zeropivot;
479     for (i = 0; i < n; i++) {
480       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
481       d  = (aa)[ddiag[i]];
482       rs = -PetscAbsScalar(d) - PetscRealPart(d);
483       v  = aa + ai[i];
484       nz = ai[i + 1] - ai[i];
485       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
486       if (rs > sctx.shift_top) sctx.shift_top = rs;
487     }
488     sctx.shift_top *= 1.1;
489     sctx.nshift_max = 5;
490     sctx.shift_lo   = 0.;
491     sctx.shift_hi   = 1.;
492   }
493 
494   PetscCall(ISGetIndices(isrow, &r));
495   PetscCall(ISGetIndices(isicol, &ic));
496   PetscCall(PetscMalloc1(n + 1, &rtmp));
497   ics = ic;
498 
499   do {
500     sctx.newshift = PETSC_FALSE;
501     for (i = 0; i < n; i++) {
502       /* zero rtmp */
503       /* L part */
504       nz    = bi[i + 1] - bi[i];
505       bjtmp = bj + bi[i];
506       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
507 
508       /* U part */
509       nz    = bdiag[i] - bdiag[i + 1];
510       bjtmp = bj + bdiag[i + 1] + 1;
511       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
512 
513       /* load in initial (unfactored row) */
514       nz    = ai[r[i] + 1] - ai[r[i]];
515       ajtmp = aj + ai[r[i]];
516       v     = aa + ai[r[i]];
517       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
518       /* ZeropivotApply() */
519       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
520 
521       /* elimination */
522       bjtmp = bj + bi[i];
523       row   = *bjtmp++;
524       nzL   = bi[i + 1] - bi[i];
525       for (k = 0; k < nzL; k++) {
526         pc = rtmp + row;
527         if (*pc != 0.0) {
528           pv         = b->a + bdiag[row];
529           multiplier = *pc * (*pv);
530           *pc        = multiplier;
531 
532           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
533           pv = b->a + bdiag[row + 1] + 1;
534           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
535 
536           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
537           PetscCall(PetscLogFlops(1 + 2.0 * nz));
538         }
539         row = *bjtmp++;
540       }
541 
542       /* finished row so stick it into b->a */
543       rs = 0.0;
544       /* L part */
545       pv = b->a + bi[i];
546       pj = b->j + bi[i];
547       nz = bi[i + 1] - bi[i];
548       for (j = 0; j < nz; j++) {
549         pv[j] = rtmp[pj[j]];
550         rs += PetscAbsScalar(pv[j]);
551       }
552 
553       /* U part */
554       pv = b->a + bdiag[i + 1] + 1;
555       pj = b->j + bdiag[i + 1] + 1;
556       nz = bdiag[i] - bdiag[i + 1] - 1;
557       for (j = 0; j < nz; j++) {
558         pv[j] = rtmp[pj[j]];
559         rs += PetscAbsScalar(pv[j]);
560       }
561 
562       sctx.rs = rs;
563       sctx.pv = rtmp[i];
564       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
565       if (sctx.newshift) break; /* break for-loop */
566       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
567 
568       /* Mark diagonal and invert diagonal for simpler triangular solves */
569       pv  = b->a + bdiag[i];
570       *pv = 1.0 / rtmp[i];
571 
572     } /* endof for (i=0; i<n; i++) { */
573 
574     /* MatPivotRefine() */
575     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
576       /*
577        * if no shift in this attempt & shifting & started shifting & can refine,
578        * then try lower shift
579        */
580       sctx.shift_hi       = sctx.shift_fraction;
581       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
582       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
583       sctx.newshift       = PETSC_TRUE;
584       sctx.nshift++;
585     }
586   } while (sctx.newshift);
587 
588   PetscCall(PetscFree(rtmp));
589   PetscCall(ISRestoreIndices(isicol, &ic));
590   PetscCall(ISRestoreIndices(isrow, &r));
591 
592   PetscCall(ISIdentity(isrow, &row_identity));
593   PetscCall(ISIdentity(isicol, &col_identity));
594   if (b->inode.size) {
595     C->ops->solve = MatSolve_SeqAIJ_Inode;
596   } else if (row_identity && col_identity) {
597     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
598   } else {
599     C->ops->solve = MatSolve_SeqAIJ;
600   }
601   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
602   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
603   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
604   C->ops->matsolve          = MatMatSolve_SeqAIJ;
605   C->assembled              = PETSC_TRUE;
606   C->preallocated           = PETSC_TRUE;
607 
608   PetscCall(PetscLogFlops(C->cmap->n));
609 
610   /* MatShiftView(A,info,&sctx) */
611   if (sctx.nshift) {
612     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
613       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
614     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
615       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
616     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
617       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
618     }
619   }
620   PetscFunctionReturn(PETSC_SUCCESS);
621 }
622 
623 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
624 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
625 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
626 
627 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
628 {
629   Mat              C = B;
630   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
631   IS               isrow = b->row, isicol = b->icol;
632   const PetscInt  *r, *ic, *ics;
633   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
634   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
635   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
636   MatScalar       *pv, *rtmp, *pc, multiplier, d;
637   const MatScalar *v, *aa = a->a;
638   PetscReal        rs = 0.0;
639   FactorShiftCtx   sctx;
640   const PetscInt  *ddiag;
641   PetscBool        row_identity, col_identity;
642 
643   PetscFunctionBegin;
644   /* MatPivotSetUp(): initialize shift context sctx */
645   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
646 
647   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
648     ddiag          = a->diag;
649     sctx.shift_top = info->zeropivot;
650     for (i = 0; i < n; i++) {
651       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
652       d  = (aa)[ddiag[i]];
653       rs = -PetscAbsScalar(d) - PetscRealPart(d);
654       v  = aa + ai[i];
655       nz = ai[i + 1] - ai[i];
656       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
657       if (rs > sctx.shift_top) sctx.shift_top = rs;
658     }
659     sctx.shift_top *= 1.1;
660     sctx.nshift_max = 5;
661     sctx.shift_lo   = 0.;
662     sctx.shift_hi   = 1.;
663   }
664 
665   PetscCall(ISGetIndices(isrow, &r));
666   PetscCall(ISGetIndices(isicol, &ic));
667   PetscCall(PetscMalloc1(n + 1, &rtmp));
668   ics = ic;
669 
670   do {
671     sctx.newshift = PETSC_FALSE;
672     for (i = 0; i < n; i++) {
673       nz    = bi[i + 1] - bi[i];
674       bjtmp = bj + bi[i];
675       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
676 
677       /* load in initial (unfactored row) */
678       nz    = ai[r[i] + 1] - ai[r[i]];
679       ajtmp = aj + ai[r[i]];
680       v     = aa + ai[r[i]];
681       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
682       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
683 
684       row = *bjtmp++;
685       while (row < i) {
686         pc = rtmp + row;
687         if (*pc != 0.0) {
688           pv         = b->a + diag_offset[row];
689           pj         = b->j + diag_offset[row] + 1;
690           multiplier = *pc / *pv++;
691           *pc        = multiplier;
692           nz         = bi[row + 1] - diag_offset[row] - 1;
693           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
694           PetscCall(PetscLogFlops(1 + 2.0 * nz));
695         }
696         row = *bjtmp++;
697       }
698       /* finished row so stick it into b->a */
699       pv   = b->a + bi[i];
700       pj   = b->j + bi[i];
701       nz   = bi[i + 1] - bi[i];
702       diag = diag_offset[i] - bi[i];
703       rs   = 0.0;
704       for (j = 0; j < nz; j++) {
705         pv[j] = rtmp[pj[j]];
706         rs += PetscAbsScalar(pv[j]);
707       }
708       rs -= PetscAbsScalar(pv[diag]);
709 
710       sctx.rs = rs;
711       sctx.pv = pv[diag];
712       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
713       if (sctx.newshift) break;
714       pv[diag] = sctx.pv;
715     }
716 
717     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
718       /*
719        * if no shift in this attempt & shifting & started shifting & can refine,
720        * then try lower shift
721        */
722       sctx.shift_hi       = sctx.shift_fraction;
723       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
724       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
725       sctx.newshift       = PETSC_TRUE;
726       sctx.nshift++;
727     }
728   } while (sctx.newshift);
729 
730   /* invert diagonal entries for simpler triangular solves */
731   for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
732   PetscCall(PetscFree(rtmp));
733   PetscCall(ISRestoreIndices(isicol, &ic));
734   PetscCall(ISRestoreIndices(isrow, &r));
735 
736   PetscCall(ISIdentity(isrow, &row_identity));
737   PetscCall(ISIdentity(isicol, &col_identity));
738   if (row_identity && col_identity) {
739     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
740   } else {
741     C->ops->solve = MatSolve_SeqAIJ_inplace;
742   }
743   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
744   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
745   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
746   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
747 
748   C->assembled    = PETSC_TRUE;
749   C->preallocated = PETSC_TRUE;
750 
751   PetscCall(PetscLogFlops(C->cmap->n));
752   if (sctx.nshift) {
753     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
754       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
755     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
756       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
757     }
758   }
759   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
760   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
761 
762   PetscCall(MatSeqAIJCheckInode(C));
763   PetscFunctionReturn(PETSC_SUCCESS);
764 }
765 
766 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
767 
768 /*
769    This routine implements inplace ILU(0) with row or/and column permutations.
770    Input:
771      A - original matrix
772    Output;
773      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
774          a->j (col index) is permuted by the inverse of colperm, then sorted
775          a->a reordered accordingly with a->j
776          a->diag (ptr to diagonal elements) is updated.
777 */
778 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
779 {
780   Mat_SeqAIJ      *a     = (Mat_SeqAIJ *)A->data;
781   IS               isrow = a->row, isicol = a->icol;
782   const PetscInt  *r, *ic, *ics;
783   PetscInt         i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
784   PetscInt        *ajtmp, nz, row;
785   PetscInt        *diag = a->diag, nbdiag, *pj;
786   PetscScalar     *rtmp, *pc, multiplier, d;
787   MatScalar       *pv, *v;
788   PetscReal        rs;
789   FactorShiftCtx   sctx;
790   const MatScalar *aa = a->a, *vtmp;
791 
792   PetscFunctionBegin;
793   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
794 
795   /* MatPivotSetUp(): initialize shift context sctx */
796   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
797 
798   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
799     const PetscInt *ddiag = a->diag;
800     sctx.shift_top        = info->zeropivot;
801     for (i = 0; i < n; i++) {
802       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
803       d    = (aa)[ddiag[i]];
804       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
805       vtmp = aa + ai[i];
806       nz   = ai[i + 1] - ai[i];
807       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
808       if (rs > sctx.shift_top) sctx.shift_top = rs;
809     }
810     sctx.shift_top *= 1.1;
811     sctx.nshift_max = 5;
812     sctx.shift_lo   = 0.;
813     sctx.shift_hi   = 1.;
814   }
815 
816   PetscCall(ISGetIndices(isrow, &r));
817   PetscCall(ISGetIndices(isicol, &ic));
818   PetscCall(PetscMalloc1(n + 1, &rtmp));
819   PetscCall(PetscArrayzero(rtmp, n + 1));
820   ics = ic;
821 
822 #if defined(MV)
823   sctx.shift_top      = 0.;
824   sctx.nshift_max     = 0;
825   sctx.shift_lo       = 0.;
826   sctx.shift_hi       = 0.;
827   sctx.shift_fraction = 0.;
828 
829   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
830     sctx.shift_top = 0.;
831     for (i = 0; i < n; i++) {
832       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
833       d  = (a->a)[diag[i]];
834       rs = -PetscAbsScalar(d) - PetscRealPart(d);
835       v  = a->a + ai[i];
836       nz = ai[i + 1] - ai[i];
837       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
838       if (rs > sctx.shift_top) sctx.shift_top = rs;
839     }
840     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
841     sctx.shift_top *= 1.1;
842     sctx.nshift_max = 5;
843     sctx.shift_lo   = 0.;
844     sctx.shift_hi   = 1.;
845   }
846 
847   sctx.shift_amount = 0.;
848   sctx.nshift       = 0;
849 #endif
850 
851   do {
852     sctx.newshift = PETSC_FALSE;
853     for (i = 0; i < n; i++) {
854       /* load in initial unfactored row */
855       nz    = ai[r[i] + 1] - ai[r[i]];
856       ajtmp = aj + ai[r[i]];
857       v     = a->a + ai[r[i]];
858       /* sort permuted ajtmp and values v accordingly */
859       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
860       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
861 
862       diag[r[i]] = ai[r[i]];
863       for (j = 0; j < nz; j++) {
864         rtmp[ajtmp[j]] = v[j];
865         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
866       }
867       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
868 
869       row = *ajtmp++;
870       while (row < i) {
871         pc = rtmp + row;
872         if (*pc != 0.0) {
873           pv = a->a + diag[r[row]];
874           pj = aj + diag[r[row]] + 1;
875 
876           multiplier = *pc / *pv++;
877           *pc        = multiplier;
878           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
879           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
880           PetscCall(PetscLogFlops(1 + 2.0 * nz));
881         }
882         row = *ajtmp++;
883       }
884       /* finished row so overwrite it onto a->a */
885       pv     = a->a + ai[r[i]];
886       pj     = aj + ai[r[i]];
887       nz     = ai[r[i] + 1] - ai[r[i]];
888       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
889 
890       rs = 0.0;
891       for (j = 0; j < nz; j++) {
892         pv[j] = rtmp[pj[j]];
893         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
894       }
895 
896       sctx.rs = rs;
897       sctx.pv = pv[nbdiag];
898       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
899       if (sctx.newshift) break;
900       pv[nbdiag] = sctx.pv;
901     }
902 
903     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
904       /*
905        * if no shift in this attempt & shifting & started shifting & can refine,
906        * then try lower shift
907        */
908       sctx.shift_hi       = sctx.shift_fraction;
909       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
910       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
911       sctx.newshift       = PETSC_TRUE;
912       sctx.nshift++;
913     }
914   } while (sctx.newshift);
915 
916   /* invert diagonal entries for simpler triangular solves */
917   for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];
918 
919   PetscCall(PetscFree(rtmp));
920   PetscCall(ISRestoreIndices(isicol, &ic));
921   PetscCall(ISRestoreIndices(isrow, &r));
922 
923   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
924   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
925   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
926   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
927 
928   A->assembled    = PETSC_TRUE;
929   A->preallocated = PETSC_TRUE;
930 
931   PetscCall(PetscLogFlops(A->cmap->n));
932   if (sctx.nshift) {
933     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
934       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
935     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
936       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
937     }
938   }
939   PetscFunctionReturn(PETSC_SUCCESS);
940 }
941 
942 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
943 {
944   Mat C;
945 
946   PetscFunctionBegin;
947   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
948   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
949   PetscCall(MatLUFactorNumeric(C, A, info));
950 
951   A->ops->solve          = C->ops->solve;
952   A->ops->solvetranspose = C->ops->solvetranspose;
953 
954   PetscCall(MatHeaderMerge(A, &C));
955   PetscFunctionReturn(PETSC_SUCCESS);
956 }
957 
958 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
959 {
960   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
961   IS                 iscol = a->col, isrow = a->row;
962   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
963   PetscInt           nz;
964   const PetscInt    *rout, *cout, *r, *c;
965   PetscScalar       *x, *tmp, *tmps, sum;
966   const PetscScalar *b;
967   const MatScalar   *aa = a->a, *v;
968 
969   PetscFunctionBegin;
970   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
971 
972   PetscCall(VecGetArrayRead(bb, &b));
973   PetscCall(VecGetArrayWrite(xx, &x));
974   tmp = a->solve_work;
975 
976   PetscCall(ISGetIndices(isrow, &rout));
977   r = rout;
978   PetscCall(ISGetIndices(iscol, &cout));
979   c = cout + (n - 1);
980 
981   /* forward solve the lower triangular */
982   tmp[0] = b[*r++];
983   tmps   = tmp;
984   for (i = 1; i < n; i++) {
985     v   = aa + ai[i];
986     vi  = aj + ai[i];
987     nz  = a->diag[i] - ai[i];
988     sum = b[*r++];
989     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
990     tmp[i] = sum;
991   }
992 
993   /* backward solve the upper triangular */
994   for (i = n - 1; i >= 0; i--) {
995     v   = aa + a->diag[i] + 1;
996     vi  = aj + a->diag[i] + 1;
997     nz  = ai[i + 1] - a->diag[i] - 1;
998     sum = tmp[i];
999     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1000     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
1001   }
1002 
1003   PetscCall(ISRestoreIndices(isrow, &rout));
1004   PetscCall(ISRestoreIndices(iscol, &cout));
1005   PetscCall(VecRestoreArrayRead(bb, &b));
1006   PetscCall(VecRestoreArrayWrite(xx, &x));
1007   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1008   PetscFunctionReturn(PETSC_SUCCESS);
1009 }
1010 
1011 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1012 {
1013   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1014   IS                 iscol = a->col, isrow = a->row;
1015   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1016   PetscInt           nz, neq, ldb, ldx;
1017   const PetscInt    *rout, *cout, *r, *c;
1018   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
1019   const PetscScalar *b, *aa  = a->a, *v;
1020   PetscBool          isdense;
1021 
1022   PetscFunctionBegin;
1023   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1024   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1025   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1026   if (X != B) {
1027     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1028     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1029   }
1030   PetscCall(MatDenseGetArrayRead(B, &b));
1031   PetscCall(MatDenseGetLDA(B, &ldb));
1032   PetscCall(MatDenseGetArray(X, &x));
1033   PetscCall(MatDenseGetLDA(X, &ldx));
1034   PetscCall(ISGetIndices(isrow, &rout));
1035   r = rout;
1036   PetscCall(ISGetIndices(iscol, &cout));
1037   c = cout;
1038   for (neq = 0; neq < B->cmap->n; neq++) {
1039     /* forward solve the lower triangular */
1040     tmp[0] = b[r[0]];
1041     tmps   = tmp;
1042     for (i = 1; i < n; i++) {
1043       v   = aa + ai[i];
1044       vi  = aj + ai[i];
1045       nz  = a->diag[i] - ai[i];
1046       sum = b[r[i]];
1047       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1048       tmp[i] = sum;
1049     }
1050     /* backward solve the upper triangular */
1051     for (i = n - 1; i >= 0; i--) {
1052       v   = aa + a->diag[i] + 1;
1053       vi  = aj + a->diag[i] + 1;
1054       nz  = ai[i + 1] - a->diag[i] - 1;
1055       sum = tmp[i];
1056       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1057       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1058     }
1059     b += ldb;
1060     x += ldx;
1061   }
1062   PetscCall(ISRestoreIndices(isrow, &rout));
1063   PetscCall(ISRestoreIndices(iscol, &cout));
1064   PetscCall(MatDenseRestoreArrayRead(B, &b));
1065   PetscCall(MatDenseRestoreArray(X, &x));
1066   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1067   PetscFunctionReturn(PETSC_SUCCESS);
1068 }
1069 
1070 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1071 {
1072   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1073   IS                 iscol = a->col, isrow = a->row;
1074   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1075   PetscInt           nz, neq, ldb, ldx;
1076   const PetscInt    *rout, *cout, *r, *c;
1077   PetscScalar       *x, *tmp = a->solve_work, sum;
1078   const PetscScalar *b, *aa  = a->a, *v;
1079   PetscBool          isdense;
1080 
1081   PetscFunctionBegin;
1082   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1083   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1084   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1085   if (X != B) {
1086     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1087     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1088   }
1089   PetscCall(MatDenseGetArrayRead(B, &b));
1090   PetscCall(MatDenseGetLDA(B, &ldb));
1091   PetscCall(MatDenseGetArray(X, &x));
1092   PetscCall(MatDenseGetLDA(X, &ldx));
1093   PetscCall(ISGetIndices(isrow, &rout));
1094   r = rout;
1095   PetscCall(ISGetIndices(iscol, &cout));
1096   c = cout;
1097   for (neq = 0; neq < B->cmap->n; neq++) {
1098     /* forward solve the lower triangular */
1099     tmp[0] = b[r[0]];
1100     v      = aa;
1101     vi     = aj;
1102     for (i = 1; i < n; i++) {
1103       nz  = ai[i + 1] - ai[i];
1104       sum = b[r[i]];
1105       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1106       tmp[i] = sum;
1107       v += nz;
1108       vi += nz;
1109     }
1110     /* backward solve the upper triangular */
1111     for (i = n - 1; i >= 0; i--) {
1112       v   = aa + adiag[i + 1] + 1;
1113       vi  = aj + adiag[i + 1] + 1;
1114       nz  = adiag[i] - adiag[i + 1] - 1;
1115       sum = tmp[i];
1116       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1117       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1118     }
1119     b += ldb;
1120     x += ldx;
1121   }
1122   PetscCall(ISRestoreIndices(isrow, &rout));
1123   PetscCall(ISRestoreIndices(iscol, &cout));
1124   PetscCall(MatDenseRestoreArrayRead(B, &b));
1125   PetscCall(MatDenseRestoreArray(X, &x));
1126   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1127   PetscFunctionReturn(PETSC_SUCCESS);
1128 }
1129 
1130 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1131 {
1132   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1133   IS                 iscol = a->col, isrow = a->row;
1134   const PetscInt    *r, *c, *rout, *cout;
1135   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1136   PetscInt           nz, row;
1137   PetscScalar       *x, *tmp, *tmps, sum;
1138   const PetscScalar *b;
1139   const MatScalar   *aa = a->a, *v;
1140 
1141   PetscFunctionBegin;
1142   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1143 
1144   PetscCall(VecGetArrayRead(bb, &b));
1145   PetscCall(VecGetArrayWrite(xx, &x));
1146   tmp = a->solve_work;
1147 
1148   PetscCall(ISGetIndices(isrow, &rout));
1149   r = rout;
1150   PetscCall(ISGetIndices(iscol, &cout));
1151   c = cout + (n - 1);
1152 
1153   /* forward solve the lower triangular */
1154   tmp[0] = b[*r++];
1155   tmps   = tmp;
1156   for (row = 1; row < n; row++) {
1157     i   = rout[row]; /* permuted row */
1158     v   = aa + ai[i];
1159     vi  = aj + ai[i];
1160     nz  = a->diag[i] - ai[i];
1161     sum = b[*r++];
1162     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1163     tmp[row] = sum;
1164   }
1165 
1166   /* backward solve the upper triangular */
1167   for (row = n - 1; row >= 0; row--) {
1168     i   = rout[row]; /* permuted row */
1169     v   = aa + a->diag[i] + 1;
1170     vi  = aj + a->diag[i] + 1;
1171     nz  = ai[i + 1] - a->diag[i] - 1;
1172     sum = tmp[row];
1173     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1174     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1175   }
1176 
1177   PetscCall(ISRestoreIndices(isrow, &rout));
1178   PetscCall(ISRestoreIndices(iscol, &cout));
1179   PetscCall(VecRestoreArrayRead(bb, &b));
1180   PetscCall(VecRestoreArrayWrite(xx, &x));
1181   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1182   PetscFunctionReturn(PETSC_SUCCESS);
1183 }
1184 
1185 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1186 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1187 {
1188   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1189   PetscInt           n  = A->rmap->n;
1190   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1191   PetscScalar       *x;
1192   const PetscScalar *b;
1193   const MatScalar   *aa = a->a;
1194 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1195   PetscInt         adiag_i, i, nz, ai_i;
1196   const PetscInt  *vi;
1197   const MatScalar *v;
1198   PetscScalar      sum;
1199 #endif
1200 
1201   PetscFunctionBegin;
1202   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1203 
1204   PetscCall(VecGetArrayRead(bb, &b));
1205   PetscCall(VecGetArrayWrite(xx, &x));
1206 
1207 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1208   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1209 #else
1210   /* forward solve the lower triangular */
1211   x[0] = b[0];
1212   for (i = 1; i < n; i++) {
1213     ai_i = ai[i];
1214     v    = aa + ai_i;
1215     vi   = aj + ai_i;
1216     nz   = adiag[i] - ai_i;
1217     sum  = b[i];
1218     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1219     x[i] = sum;
1220   }
1221 
1222   /* backward solve the upper triangular */
1223   for (i = n - 1; i >= 0; i--) {
1224     adiag_i = adiag[i];
1225     v       = aa + adiag_i + 1;
1226     vi      = aj + adiag_i + 1;
1227     nz      = ai[i + 1] - adiag_i - 1;
1228     sum     = x[i];
1229     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1230     x[i] = sum * aa[adiag_i];
1231   }
1232 #endif
1233   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1234   PetscCall(VecRestoreArrayRead(bb, &b));
1235   PetscCall(VecRestoreArrayWrite(xx, &x));
1236   PetscFunctionReturn(PETSC_SUCCESS);
1237 }
1238 
1239 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1240 {
1241   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1242   IS                 iscol = a->col, isrow = a->row;
1243   PetscInt           i, n                  = A->rmap->n, j;
1244   PetscInt           nz;
1245   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1246   PetscScalar       *x, *tmp, sum;
1247   const PetscScalar *b;
1248   const MatScalar   *aa = a->a, *v;
1249 
1250   PetscFunctionBegin;
1251   if (yy != xx) PetscCall(VecCopy(yy, xx));
1252 
1253   PetscCall(VecGetArrayRead(bb, &b));
1254   PetscCall(VecGetArray(xx, &x));
1255   tmp = a->solve_work;
1256 
1257   PetscCall(ISGetIndices(isrow, &rout));
1258   r = rout;
1259   PetscCall(ISGetIndices(iscol, &cout));
1260   c = cout + (n - 1);
1261 
1262   /* forward solve the lower triangular */
1263   tmp[0] = b[*r++];
1264   for (i = 1; i < n; i++) {
1265     v   = aa + ai[i];
1266     vi  = aj + ai[i];
1267     nz  = a->diag[i] - ai[i];
1268     sum = b[*r++];
1269     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1270     tmp[i] = sum;
1271   }
1272 
1273   /* backward solve the upper triangular */
1274   for (i = n - 1; i >= 0; i--) {
1275     v   = aa + a->diag[i] + 1;
1276     vi  = aj + a->diag[i] + 1;
1277     nz  = ai[i + 1] - a->diag[i] - 1;
1278     sum = tmp[i];
1279     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1280     tmp[i] = sum * aa[a->diag[i]];
1281     x[*c--] += tmp[i];
1282   }
1283 
1284   PetscCall(ISRestoreIndices(isrow, &rout));
1285   PetscCall(ISRestoreIndices(iscol, &cout));
1286   PetscCall(VecRestoreArrayRead(bb, &b));
1287   PetscCall(VecRestoreArray(xx, &x));
1288   PetscCall(PetscLogFlops(2.0 * a->nz));
1289   PetscFunctionReturn(PETSC_SUCCESS);
1290 }
1291 
1292 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1293 {
1294   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1295   IS                 iscol = a->col, isrow = a->row;
1296   PetscInt           i, n                  = A->rmap->n, j;
1297   PetscInt           nz;
1298   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1299   PetscScalar       *x, *tmp, sum;
1300   const PetscScalar *b;
1301   const MatScalar   *aa = a->a, *v;
1302 
1303   PetscFunctionBegin;
1304   if (yy != xx) PetscCall(VecCopy(yy, xx));
1305 
1306   PetscCall(VecGetArrayRead(bb, &b));
1307   PetscCall(VecGetArray(xx, &x));
1308   tmp = a->solve_work;
1309 
1310   PetscCall(ISGetIndices(isrow, &rout));
1311   r = rout;
1312   PetscCall(ISGetIndices(iscol, &cout));
1313   c = cout;
1314 
1315   /* forward solve the lower triangular */
1316   tmp[0] = b[r[0]];
1317   v      = aa;
1318   vi     = aj;
1319   for (i = 1; i < n; i++) {
1320     nz  = ai[i + 1] - ai[i];
1321     sum = b[r[i]];
1322     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1323     tmp[i] = sum;
1324     v += nz;
1325     vi += nz;
1326   }
1327 
1328   /* backward solve the upper triangular */
1329   v  = aa + adiag[n - 1];
1330   vi = aj + adiag[n - 1];
1331   for (i = n - 1; i >= 0; i--) {
1332     nz  = adiag[i] - adiag[i + 1] - 1;
1333     sum = tmp[i];
1334     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1335     tmp[i] = sum * v[nz];
1336     x[c[i]] += tmp[i];
1337     v += nz + 1;
1338     vi += nz + 1;
1339   }
1340 
1341   PetscCall(ISRestoreIndices(isrow, &rout));
1342   PetscCall(ISRestoreIndices(iscol, &cout));
1343   PetscCall(VecRestoreArrayRead(bb, &b));
1344   PetscCall(VecRestoreArray(xx, &x));
1345   PetscCall(PetscLogFlops(2.0 * a->nz));
1346   PetscFunctionReturn(PETSC_SUCCESS);
1347 }
1348 
1349 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1350 {
1351   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1352   IS                 iscol = a->col, isrow = a->row;
1353   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1354   PetscInt           i, n = A->rmap->n, j;
1355   PetscInt           nz;
1356   PetscScalar       *x, *tmp, s1;
1357   const MatScalar   *aa = a->a, *v;
1358   const PetscScalar *b;
1359 
1360   PetscFunctionBegin;
1361   PetscCall(VecGetArrayRead(bb, &b));
1362   PetscCall(VecGetArrayWrite(xx, &x));
1363   tmp = a->solve_work;
1364 
1365   PetscCall(ISGetIndices(isrow, &rout));
1366   r = rout;
1367   PetscCall(ISGetIndices(iscol, &cout));
1368   c = cout;
1369 
1370   /* copy the b into temp work space according to permutation */
1371   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1372 
1373   /* forward solve the U^T */
1374   for (i = 0; i < n; i++) {
1375     v  = aa + diag[i];
1376     vi = aj + diag[i] + 1;
1377     nz = ai[i + 1] - diag[i] - 1;
1378     s1 = tmp[i];
1379     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1380     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1381     tmp[i] = s1;
1382   }
1383 
1384   /* backward solve the L^T */
1385   for (i = n - 1; i >= 0; i--) {
1386     v  = aa + diag[i] - 1;
1387     vi = aj + diag[i] - 1;
1388     nz = diag[i] - ai[i];
1389     s1 = tmp[i];
1390     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1391   }
1392 
1393   /* copy tmp into x according to permutation */
1394   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1395 
1396   PetscCall(ISRestoreIndices(isrow, &rout));
1397   PetscCall(ISRestoreIndices(iscol, &cout));
1398   PetscCall(VecRestoreArrayRead(bb, &b));
1399   PetscCall(VecRestoreArrayWrite(xx, &x));
1400 
1401   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1402   PetscFunctionReturn(PETSC_SUCCESS);
1403 }
1404 
1405 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1406 {
1407   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1408   IS                 iscol = a->col, isrow = a->row;
1409   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1410   PetscInt           i, n = A->rmap->n, j;
1411   PetscInt           nz;
1412   PetscScalar       *x, *tmp, s1;
1413   const MatScalar   *aa = a->a, *v;
1414   const PetscScalar *b;
1415 
1416   PetscFunctionBegin;
1417   PetscCall(VecGetArrayRead(bb, &b));
1418   PetscCall(VecGetArrayWrite(xx, &x));
1419   tmp = a->solve_work;
1420 
1421   PetscCall(ISGetIndices(isrow, &rout));
1422   r = rout;
1423   PetscCall(ISGetIndices(iscol, &cout));
1424   c = cout;
1425 
1426   /* copy the b into temp work space according to permutation */
1427   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1428 
1429   /* forward solve the U^T */
1430   for (i = 0; i < n; i++) {
1431     v  = aa + adiag[i + 1] + 1;
1432     vi = aj + adiag[i + 1] + 1;
1433     nz = adiag[i] - adiag[i + 1] - 1;
1434     s1 = tmp[i];
1435     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1436     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1437     tmp[i] = s1;
1438   }
1439 
1440   /* backward solve the L^T */
1441   for (i = n - 1; i >= 0; i--) {
1442     v  = aa + ai[i];
1443     vi = aj + ai[i];
1444     nz = ai[i + 1] - ai[i];
1445     s1 = tmp[i];
1446     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1447   }
1448 
1449   /* copy tmp into x according to permutation */
1450   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1451 
1452   PetscCall(ISRestoreIndices(isrow, &rout));
1453   PetscCall(ISRestoreIndices(iscol, &cout));
1454   PetscCall(VecRestoreArrayRead(bb, &b));
1455   PetscCall(VecRestoreArrayWrite(xx, &x));
1456 
1457   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1458   PetscFunctionReturn(PETSC_SUCCESS);
1459 }
1460 
1461 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1462 {
1463   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1464   IS                 iscol = a->col, isrow = a->row;
1465   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1466   PetscInt           i, n = A->rmap->n, j;
1467   PetscInt           nz;
1468   PetscScalar       *x, *tmp, s1;
1469   const MatScalar   *aa = a->a, *v;
1470   const PetscScalar *b;
1471 
1472   PetscFunctionBegin;
1473   if (zz != xx) PetscCall(VecCopy(zz, xx));
1474   PetscCall(VecGetArrayRead(bb, &b));
1475   PetscCall(VecGetArray(xx, &x));
1476   tmp = a->solve_work;
1477 
1478   PetscCall(ISGetIndices(isrow, &rout));
1479   r = rout;
1480   PetscCall(ISGetIndices(iscol, &cout));
1481   c = cout;
1482 
1483   /* copy the b into temp work space according to permutation */
1484   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1485 
1486   /* forward solve the U^T */
1487   for (i = 0; i < n; i++) {
1488     v  = aa + diag[i];
1489     vi = aj + diag[i] + 1;
1490     nz = ai[i + 1] - diag[i] - 1;
1491     s1 = tmp[i];
1492     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1493     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1494     tmp[i] = s1;
1495   }
1496 
1497   /* backward solve the L^T */
1498   for (i = n - 1; i >= 0; i--) {
1499     v  = aa + diag[i] - 1;
1500     vi = aj + diag[i] - 1;
1501     nz = diag[i] - ai[i];
1502     s1 = tmp[i];
1503     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1504   }
1505 
1506   /* copy tmp into x according to permutation */
1507   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1508 
1509   PetscCall(ISRestoreIndices(isrow, &rout));
1510   PetscCall(ISRestoreIndices(iscol, &cout));
1511   PetscCall(VecRestoreArrayRead(bb, &b));
1512   PetscCall(VecRestoreArray(xx, &x));
1513 
1514   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1515   PetscFunctionReturn(PETSC_SUCCESS);
1516 }
1517 
1518 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1519 {
1520   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1521   IS                 iscol = a->col, isrow = a->row;
1522   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1523   PetscInt           i, n = A->rmap->n, j;
1524   PetscInt           nz;
1525   PetscScalar       *x, *tmp, s1;
1526   const MatScalar   *aa = a->a, *v;
1527   const PetscScalar *b;
1528 
1529   PetscFunctionBegin;
1530   if (zz != xx) PetscCall(VecCopy(zz, xx));
1531   PetscCall(VecGetArrayRead(bb, &b));
1532   PetscCall(VecGetArray(xx, &x));
1533   tmp = a->solve_work;
1534 
1535   PetscCall(ISGetIndices(isrow, &rout));
1536   r = rout;
1537   PetscCall(ISGetIndices(iscol, &cout));
1538   c = cout;
1539 
1540   /* copy the b into temp work space according to permutation */
1541   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1542 
1543   /* forward solve the U^T */
1544   for (i = 0; i < n; i++) {
1545     v  = aa + adiag[i + 1] + 1;
1546     vi = aj + adiag[i + 1] + 1;
1547     nz = adiag[i] - adiag[i + 1] - 1;
1548     s1 = tmp[i];
1549     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1550     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1551     tmp[i] = s1;
1552   }
1553 
1554   /* backward solve the L^T */
1555   for (i = n - 1; i >= 0; i--) {
1556     v  = aa + ai[i];
1557     vi = aj + ai[i];
1558     nz = ai[i + 1] - ai[i];
1559     s1 = tmp[i];
1560     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1561   }
1562 
1563   /* copy tmp into x according to permutation */
1564   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1565 
1566   PetscCall(ISRestoreIndices(isrow, &rout));
1567   PetscCall(ISRestoreIndices(iscol, &cout));
1568   PetscCall(VecRestoreArrayRead(bb, &b));
1569   PetscCall(VecRestoreArray(xx, &x));
1570 
1571   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1572   PetscFunctionReturn(PETSC_SUCCESS);
1573 }
1574 
1575 /*
1576    ilu() under revised new data structure.
1577    Factored arrays bj and ba are stored as
1578      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1579 
1580    bi=fact->i is an array of size n+1, in which
1581      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1582      bi[n]:  points to L(n-1,n-1)+1
1583 
1584   bdiag=fact->diag is an array of size n+1,in which
1585      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1586      bdiag[n]: points to entry of U(n-1,0)-1
1587 
1588    U(i,:) contains bdiag[i] as its last entry, i.e.,
1589     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1590 */
1591 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1592 {
1593   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1594   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1595   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1596   IS             isicol;
1597 
1598   PetscFunctionBegin;
1599   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1600   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1601   b = (Mat_SeqAIJ *)(fact)->data;
1602 
1603   /* allocate matrix arrays for new data structure */
1604   PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i));
1605 
1606   b->singlemalloc = PETSC_TRUE;
1607   if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1608   bdiag = b->diag;
1609 
1610   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1611 
1612   /* set bi and bj with new data structure */
1613   bi = b->i;
1614   bj = b->j;
1615 
1616   /* L part */
1617   bi[0] = 0;
1618   for (i = 0; i < n; i++) {
1619     nz        = adiag[i] - ai[i];
1620     bi[i + 1] = bi[i] + nz;
1621     aj        = a->j + ai[i];
1622     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1623   }
1624 
1625   /* U part */
1626   bdiag[n] = bi[n] - 1;
1627   for (i = n - 1; i >= 0; i--) {
1628     nz = ai[i + 1] - adiag[i] - 1;
1629     aj = a->j + adiag[i] + 1;
1630     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1631     /* diag[i] */
1632     bj[k++]  = i;
1633     bdiag[i] = bdiag[i + 1] + nz + 1;
1634   }
1635 
1636   fact->factortype             = MAT_FACTOR_ILU;
1637   fact->info.factor_mallocs    = 0;
1638   fact->info.fill_ratio_given  = info->fill;
1639   fact->info.fill_ratio_needed = 1.0;
1640   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1641   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1642 
1643   b       = (Mat_SeqAIJ *)(fact)->data;
1644   b->row  = isrow;
1645   b->col  = iscol;
1646   b->icol = isicol;
1647   PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
1648   PetscCall(PetscObjectReference((PetscObject)isrow));
1649   PetscCall(PetscObjectReference((PetscObject)iscol));
1650   PetscFunctionReturn(PETSC_SUCCESS);
1651 }
1652 
1653 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1654 {
1655   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1656   IS                 isicol;
1657   const PetscInt    *r, *ic;
1658   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1659   PetscInt          *bi, *cols, nnz, *cols_lvl;
1660   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1661   PetscInt           i, levels, diagonal_fill;
1662   PetscBool          col_identity, row_identity, missing;
1663   PetscReal          f;
1664   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1665   PetscBT            lnkbt;
1666   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1667   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1668   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1669 
1670   PetscFunctionBegin;
1671   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1672   PetscCall(MatMissingDiagonal(A, &missing, &i));
1673   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1674 
1675   levels = (PetscInt)info->levels;
1676   PetscCall(ISIdentity(isrow, &row_identity));
1677   PetscCall(ISIdentity(iscol, &col_identity));
1678   if (!levels && row_identity && col_identity) {
1679     /* special case: ilu(0) with natural ordering */
1680     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1681     if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1682     PetscFunctionReturn(PETSC_SUCCESS);
1683   }
1684 
1685   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1686   PetscCall(ISGetIndices(isrow, &r));
1687   PetscCall(ISGetIndices(isicol, &ic));
1688 
1689   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1690   PetscCall(PetscMalloc1(n + 1, &bi));
1691   PetscCall(PetscMalloc1(n + 1, &bdiag));
1692   bi[0] = bdiag[0] = 0;
1693   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1694 
1695   /* create a linked list for storing column indices of the active row */
1696   nlnk = n + 1;
1697   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1698 
1699   /* initial FreeSpace size is f*(ai[n]+1) */
1700   f             = info->fill;
1701   diagonal_fill = (PetscInt)info->diagonal_fill;
1702   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1703   current_space = free_space;
1704   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1705   current_space_lvl = free_space_lvl;
1706   for (i = 0; i < n; i++) {
1707     nzi = 0;
1708     /* copy current row into linked list */
1709     nnz = ai[r[i] + 1] - ai[r[i]];
1710     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1711     cols   = aj + ai[r[i]];
1712     lnk[i] = -1; /* marker to indicate if diagonal exists */
1713     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1714     nzi += nlnk;
1715 
1716     /* make sure diagonal entry is included */
1717     if (diagonal_fill && lnk[i] == -1) {
1718       fm = n;
1719       while (lnk[fm] < i) fm = lnk[fm];
1720       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1721       lnk[fm]    = i;
1722       lnk_lvl[i] = 0;
1723       nzi++;
1724       dcount++;
1725     }
1726 
1727     /* add pivot rows into the active row */
1728     nzbd = 0;
1729     prow = lnk[n];
1730     while (prow < i) {
1731       nnz      = bdiag[prow];
1732       cols     = bj_ptr[prow] + nnz + 1;
1733       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1734       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1735       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1736       nzi += nlnk;
1737       prow = lnk[prow];
1738       nzbd++;
1739     }
1740     bdiag[i]  = nzbd;
1741     bi[i + 1] = bi[i] + nzi;
1742     /* if free space is not available, make more free space */
1743     if (current_space->local_remaining < nzi) {
1744       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1745       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1746       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1747       reallocs++;
1748     }
1749 
1750     /* copy data into free_space and free_space_lvl, then initialize lnk */
1751     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1752     bj_ptr[i]    = current_space->array;
1753     bjlvl_ptr[i] = current_space_lvl->array;
1754 
1755     /* make sure the active row i has diagonal entry */
1756     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1757 
1758     current_space->array += nzi;
1759     current_space->local_used += nzi;
1760     current_space->local_remaining -= nzi;
1761     current_space_lvl->array += nzi;
1762     current_space_lvl->local_used += nzi;
1763     current_space_lvl->local_remaining -= nzi;
1764   }
1765 
1766   PetscCall(ISRestoreIndices(isrow, &r));
1767   PetscCall(ISRestoreIndices(isicol, &ic));
1768   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1769   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1770   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1771 
1772   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1773   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1774   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1775 
1776 #if defined(PETSC_USE_INFO)
1777   {
1778     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1779     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1780     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1781     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1782     PetscCall(PetscInfo(A, "for best performance.\n"));
1783     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1784   }
1785 #endif
1786   /* put together the new matrix */
1787   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1788   b = (Mat_SeqAIJ *)(fact)->data;
1789 
1790   b->free_a       = PETSC_TRUE;
1791   b->free_ij      = PETSC_TRUE;
1792   b->singlemalloc = PETSC_FALSE;
1793 
1794   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
1795 
1796   b->j    = bj;
1797   b->i    = bi;
1798   b->diag = bdiag;
1799   b->ilen = NULL;
1800   b->imax = NULL;
1801   b->row  = isrow;
1802   b->col  = iscol;
1803   PetscCall(PetscObjectReference((PetscObject)isrow));
1804   PetscCall(PetscObjectReference((PetscObject)iscol));
1805   b->icol = isicol;
1806 
1807   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1808   /* In b structure:  Free imax, ilen, old a, old j.
1809      Allocate bdiag, solve_work, new a, new j */
1810   b->maxnz = b->nz = bdiag[0] + 1;
1811 
1812   (fact)->info.factor_mallocs    = reallocs;
1813   (fact)->info.fill_ratio_given  = f;
1814   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1815   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1816   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1817   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1818   PetscFunctionReturn(PETSC_SUCCESS);
1819 }
1820 
1821 #if 0
1822 // unused
1823 static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1824 {
1825   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1826   IS                 isicol;
1827   const PetscInt    *r, *ic;
1828   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1829   PetscInt          *bi, *cols, nnz, *cols_lvl;
1830   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1831   PetscInt           i, levels, diagonal_fill;
1832   PetscBool          col_identity, row_identity;
1833   PetscReal          f;
1834   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1835   PetscBT            lnkbt;
1836   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1837   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1838   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1839   PetscBool          missing;
1840 
1841   PetscFunctionBegin;
1842   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1843   PetscCall(MatMissingDiagonal(A, &missing, &i));
1844   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1845 
1846   f             = info->fill;
1847   levels        = (PetscInt)info->levels;
1848   diagonal_fill = (PetscInt)info->diagonal_fill;
1849 
1850   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1851 
1852   PetscCall(ISIdentity(isrow, &row_identity));
1853   PetscCall(ISIdentity(iscol, &col_identity));
1854   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1855     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));
1856 
1857     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1858     if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1859     fact->factortype               = MAT_FACTOR_ILU;
1860     (fact)->info.factor_mallocs    = 0;
1861     (fact)->info.fill_ratio_given  = info->fill;
1862     (fact)->info.fill_ratio_needed = 1.0;
1863 
1864     b       = (Mat_SeqAIJ *)(fact)->data;
1865     b->row  = isrow;
1866     b->col  = iscol;
1867     b->icol = isicol;
1868     PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1869     PetscCall(PetscObjectReference((PetscObject)isrow));
1870     PetscCall(PetscObjectReference((PetscObject)iscol));
1871     PetscFunctionReturn(PETSC_SUCCESS);
1872   }
1873 
1874   PetscCall(ISGetIndices(isrow, &r));
1875   PetscCall(ISGetIndices(isicol, &ic));
1876 
1877   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1878   PetscCall(PetscMalloc1(n + 1, &bi));
1879   PetscCall(PetscMalloc1(n + 1, &bdiag));
1880   bi[0] = bdiag[0] = 0;
1881 
1882   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1883 
1884   /* create a linked list for storing column indices of the active row */
1885   nlnk = n + 1;
1886   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1887 
1888   /* initial FreeSpace size is f*(ai[n]+1) */
1889   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1890   current_space = free_space;
1891   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1892   current_space_lvl = free_space_lvl;
1893 
1894   for (i = 0; i < n; i++) {
1895     nzi = 0;
1896     /* copy current row into linked list */
1897     nnz = ai[r[i] + 1] - ai[r[i]];
1898     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1899     cols   = aj + ai[r[i]];
1900     lnk[i] = -1; /* marker to indicate if diagonal exists */
1901     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1902     nzi += nlnk;
1903 
1904     /* make sure diagonal entry is included */
1905     if (diagonal_fill && lnk[i] == -1) {
1906       fm = n;
1907       while (lnk[fm] < i) fm = lnk[fm];
1908       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1909       lnk[fm]    = i;
1910       lnk_lvl[i] = 0;
1911       nzi++;
1912       dcount++;
1913     }
1914 
1915     /* add pivot rows into the active row */
1916     nzbd = 0;
1917     prow = lnk[n];
1918     while (prow < i) {
1919       nnz      = bdiag[prow];
1920       cols     = bj_ptr[prow] + nnz + 1;
1921       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1922       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1923       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1924       nzi += nlnk;
1925       prow = lnk[prow];
1926       nzbd++;
1927     }
1928     bdiag[i]  = nzbd;
1929     bi[i + 1] = bi[i] + nzi;
1930 
1931     /* if free space is not available, make more free space */
1932     if (current_space->local_remaining < nzi) {
1933       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
1934       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1935       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1936       reallocs++;
1937     }
1938 
1939     /* copy data into free_space and free_space_lvl, then initialize lnk */
1940     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1941     bj_ptr[i]    = current_space->array;
1942     bjlvl_ptr[i] = current_space_lvl->array;
1943 
1944     /* make sure the active row i has diagonal entry */
1945     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1946 
1947     current_space->array += nzi;
1948     current_space->local_used += nzi;
1949     current_space->local_remaining -= nzi;
1950     current_space_lvl->array += nzi;
1951     current_space_lvl->local_used += nzi;
1952     current_space_lvl->local_remaining -= nzi;
1953   }
1954 
1955   PetscCall(ISRestoreIndices(isrow, &r));
1956   PetscCall(ISRestoreIndices(isicol, &ic));
1957 
1958   /* destroy list of free space and other temporary arrays */
1959   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1960   PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
1961   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1962   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1963   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1964 
1965   #if defined(PETSC_USE_INFO)
1966   {
1967     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
1968     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1969     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1970     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1971     PetscCall(PetscInfo(A, "for best performance.\n"));
1972     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1973   }
1974   #endif
1975 
1976   /* put together the new matrix */
1977   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1978   b = (Mat_SeqAIJ *)(fact)->data;
1979 
1980   b->free_a       = PETSC_TRUE;
1981   b->free_ij      = PETSC_TRUE;
1982   b->singlemalloc = PETSC_FALSE;
1983 
1984   PetscCall(PetscMalloc1(bi[n], &b->a));
1985   b->j = bj;
1986   b->i = bi;
1987   for (i = 0; i < n; i++) bdiag[i] += bi[i];
1988   b->diag = bdiag;
1989   b->ilen = NULL;
1990   b->imax = NULL;
1991   b->row  = isrow;
1992   b->col  = iscol;
1993   PetscCall(PetscObjectReference((PetscObject)isrow));
1994   PetscCall(PetscObjectReference((PetscObject)iscol));
1995   b->icol = isicol;
1996   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1997   /* In b structure:  Free imax, ilen, old a, old j.
1998      Allocate bdiag, solve_work, new a, new j */
1999   b->maxnz = b->nz = bi[n];
2000 
2001   (fact)->info.factor_mallocs    = reallocs;
2002   (fact)->info.fill_ratio_given  = f;
2003   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2004   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
2005   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2006   PetscFunctionReturn(PETSC_SUCCESS);
2007 }
2008 #endif
2009 
2010 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2011 {
2012   Mat             C  = B;
2013   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2014   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2015   IS              ip = b->row, iip = b->icol;
2016   const PetscInt *rip, *riip;
2017   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
2018   PetscInt       *ai = a->i, *aj = a->j;
2019   PetscInt        k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2020   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2021   PetscBool       perm_identity;
2022   FactorShiftCtx  sctx;
2023   PetscReal       rs;
2024   MatScalar       d, *v;
2025 
2026   PetscFunctionBegin;
2027   /* MatPivotSetUp(): initialize shift context sctx */
2028   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2029 
2030   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2031     sctx.shift_top = info->zeropivot;
2032     for (i = 0; i < mbs; i++) {
2033       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2034       d  = (aa)[a->diag[i]];
2035       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2036       v  = aa + ai[i];
2037       nz = ai[i + 1] - ai[i];
2038       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2039       if (rs > sctx.shift_top) sctx.shift_top = rs;
2040     }
2041     sctx.shift_top *= 1.1;
2042     sctx.nshift_max = 5;
2043     sctx.shift_lo   = 0.;
2044     sctx.shift_hi   = 1.;
2045   }
2046 
2047   PetscCall(ISGetIndices(ip, &rip));
2048   PetscCall(ISGetIndices(iip, &riip));
2049 
2050   /* allocate working arrays
2051      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2052      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2053   */
2054   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
2055 
2056   do {
2057     sctx.newshift = PETSC_FALSE;
2058 
2059     for (i = 0; i < mbs; i++) c2r[i] = mbs;
2060     if (mbs) il[0] = 0;
2061 
2062     for (k = 0; k < mbs; k++) {
2063       /* zero rtmp */
2064       nz    = bi[k + 1] - bi[k];
2065       bjtmp = bj + bi[k];
2066       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2067 
2068       /* load in initial unfactored row */
2069       bval = ba + bi[k];
2070       jmin = ai[rip[k]];
2071       jmax = ai[rip[k] + 1];
2072       for (j = jmin; j < jmax; j++) {
2073         col = riip[aj[j]];
2074         if (col >= k) { /* only take upper triangular entry */
2075           rtmp[col] = aa[j];
2076           *bval++   = 0.0; /* for in-place factorization */
2077         }
2078       }
2079       /* shift the diagonal of the matrix: ZeropivotApply() */
2080       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2081 
2082       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2083       dk = rtmp[k];
2084       i  = c2r[k]; /* first row to be added to k_th row  */
2085 
2086       while (i < k) {
2087         nexti = c2r[i]; /* next row to be added to k_th row */
2088 
2089         /* compute multiplier, update diag(k) and U(i,k) */
2090         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
2091         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2092         dk += uikdi * ba[ili];           /* update diag[k] */
2093         ba[ili] = uikdi;                 /* -U(i,k) */
2094 
2095         /* add multiple of row i to k-th row */
2096         jmin = ili + 1;
2097         jmax = bi[i + 1];
2098         if (jmin < jmax) {
2099           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2100           /* update il and c2r for row i */
2101           il[i]  = jmin;
2102           j      = bj[jmin];
2103           c2r[i] = c2r[j];
2104           c2r[j] = i;
2105         }
2106         i = nexti;
2107       }
2108 
2109       /* copy data into U(k,:) */
2110       rs   = 0.0;
2111       jmin = bi[k];
2112       jmax = bi[k + 1] - 1;
2113       if (jmin < jmax) {
2114         for (j = jmin; j < jmax; j++) {
2115           col   = bj[j];
2116           ba[j] = rtmp[col];
2117           rs += PetscAbsScalar(ba[j]);
2118         }
2119         /* add the k-th row into il and c2r */
2120         il[k]  = jmin;
2121         i      = bj[jmin];
2122         c2r[k] = c2r[i];
2123         c2r[i] = k;
2124       }
2125 
2126       /* MatPivotCheck() */
2127       sctx.rs = rs;
2128       sctx.pv = dk;
2129       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2130       if (sctx.newshift) break;
2131       dk = sctx.pv;
2132 
2133       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2134     }
2135   } while (sctx.newshift);
2136 
2137   PetscCall(PetscFree3(rtmp, il, c2r));
2138   PetscCall(ISRestoreIndices(ip, &rip));
2139   PetscCall(ISRestoreIndices(iip, &riip));
2140 
2141   PetscCall(ISIdentity(ip, &perm_identity));
2142   if (perm_identity) {
2143     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2144     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2145     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2146     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2147   } else {
2148     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2149     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2150     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2151     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2152   }
2153 
2154   C->assembled    = PETSC_TRUE;
2155   C->preallocated = PETSC_TRUE;
2156 
2157   PetscCall(PetscLogFlops(C->rmap->n));
2158 
2159   /* MatPivotView() */
2160   if (sctx.nshift) {
2161     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2162       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2163     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2164       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2165     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2166       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2167     }
2168   }
2169   PetscFunctionReturn(PETSC_SUCCESS);
2170 }
2171 
2172 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2173 {
2174   Mat             C  = B;
2175   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2176   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2177   IS              ip = b->row, iip = b->icol;
2178   const PetscInt *rip, *riip;
2179   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2180   PetscInt       *ai = a->i, *aj = a->j;
2181   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2182   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2183   PetscBool       perm_identity;
2184   FactorShiftCtx  sctx;
2185   PetscReal       rs;
2186   MatScalar       d, *v;
2187 
2188   PetscFunctionBegin;
2189   /* MatPivotSetUp(): initialize shift context sctx */
2190   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2191 
2192   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2193     sctx.shift_top = info->zeropivot;
2194     for (i = 0; i < mbs; i++) {
2195       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2196       d  = (aa)[a->diag[i]];
2197       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2198       v  = aa + ai[i];
2199       nz = ai[i + 1] - ai[i];
2200       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2201       if (rs > sctx.shift_top) sctx.shift_top = rs;
2202     }
2203     sctx.shift_top *= 1.1;
2204     sctx.nshift_max = 5;
2205     sctx.shift_lo   = 0.;
2206     sctx.shift_hi   = 1.;
2207   }
2208 
2209   PetscCall(ISGetIndices(ip, &rip));
2210   PetscCall(ISGetIndices(iip, &riip));
2211 
2212   /* initialization */
2213   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
2214 
2215   do {
2216     sctx.newshift = PETSC_FALSE;
2217 
2218     for (i = 0; i < mbs; i++) jl[i] = mbs;
2219     il[0] = 0;
2220 
2221     for (k = 0; k < mbs; k++) {
2222       /* zero rtmp */
2223       nz    = bi[k + 1] - bi[k];
2224       bjtmp = bj + bi[k];
2225       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2226 
2227       bval = ba + bi[k];
2228       /* initialize k-th row by the perm[k]-th row of A */
2229       jmin = ai[rip[k]];
2230       jmax = ai[rip[k] + 1];
2231       for (j = jmin; j < jmax; j++) {
2232         col = riip[aj[j]];
2233         if (col >= k) { /* only take upper triangular entry */
2234           rtmp[col] = aa[j];
2235           *bval++   = 0.0; /* for in-place factorization */
2236         }
2237       }
2238       /* shift the diagonal of the matrix */
2239       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2240 
2241       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2242       dk = rtmp[k];
2243       i  = jl[k]; /* first row to be added to k_th row  */
2244 
2245       while (i < k) {
2246         nexti = jl[i]; /* next row to be added to k_th row */
2247 
2248         /* compute multiplier, update diag(k) and U(i,k) */
2249         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
2250         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2251         dk += uikdi * ba[ili];
2252         ba[ili] = uikdi; /* -U(i,k) */
2253 
2254         /* add multiple of row i to k-th row */
2255         jmin = ili + 1;
2256         jmax = bi[i + 1];
2257         if (jmin < jmax) {
2258           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2259           /* update il and jl for row i */
2260           il[i] = jmin;
2261           j     = bj[jmin];
2262           jl[i] = jl[j];
2263           jl[j] = i;
2264         }
2265         i = nexti;
2266       }
2267 
2268       /* shift the diagonals when zero pivot is detected */
2269       /* compute rs=sum of abs(off-diagonal) */
2270       rs   = 0.0;
2271       jmin = bi[k] + 1;
2272       nz   = bi[k + 1] - jmin;
2273       bcol = bj + jmin;
2274       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
2275 
2276       sctx.rs = rs;
2277       sctx.pv = dk;
2278       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2279       if (sctx.newshift) break;
2280       dk = sctx.pv;
2281 
2282       /* copy data into U(k,:) */
2283       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2284       jmin      = bi[k] + 1;
2285       jmax      = bi[k + 1];
2286       if (jmin < jmax) {
2287         for (j = jmin; j < jmax; j++) {
2288           col   = bj[j];
2289           ba[j] = rtmp[col];
2290         }
2291         /* add the k-th row into il and jl */
2292         il[k] = jmin;
2293         i     = bj[jmin];
2294         jl[k] = jl[i];
2295         jl[i] = k;
2296       }
2297     }
2298   } while (sctx.newshift);
2299 
2300   PetscCall(PetscFree3(rtmp, il, jl));
2301   PetscCall(ISRestoreIndices(ip, &rip));
2302   PetscCall(ISRestoreIndices(iip, &riip));
2303 
2304   PetscCall(ISIdentity(ip, &perm_identity));
2305   if (perm_identity) {
2306     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2307     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2308     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2309     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2310   } else {
2311     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2312     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2313     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2314     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2315   }
2316 
2317   C->assembled    = PETSC_TRUE;
2318   C->preallocated = PETSC_TRUE;
2319 
2320   PetscCall(PetscLogFlops(C->rmap->n));
2321   if (sctx.nshift) {
2322     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2323       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2324     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2325       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2326     }
2327   }
2328   PetscFunctionReturn(PETSC_SUCCESS);
2329 }
2330 
2331 /*
2332    icc() under revised new data structure.
2333    Factored arrays bj and ba are stored as
2334      U(0,:),...,U(i,:),U(n-1,:)
2335 
2336    ui=fact->i is an array of size n+1, in which
2337    ui+
2338      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2339      ui[n]:  points to U(n-1,n-1)+1
2340 
2341   udiag=fact->diag is an array of size n,in which
2342      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2343 
2344    U(i,:) contains udiag[i] as its last entry, i.e.,
2345     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2346 */
2347 
2348 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2349 {
2350   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2351   Mat_SeqSBAIJ      *b;
2352   PetscBool          perm_identity, missing;
2353   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2354   const PetscInt    *rip, *riip;
2355   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2356   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2357   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2358   PetscReal          fill = info->fill, levels = info->levels;
2359   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2360   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2361   PetscBT            lnkbt;
2362   IS                 iperm;
2363 
2364   PetscFunctionBegin;
2365   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2366   PetscCall(MatMissingDiagonal(A, &missing, &d));
2367   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2368   PetscCall(ISIdentity(perm, &perm_identity));
2369   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2370 
2371   PetscCall(PetscMalloc1(am + 1, &ui));
2372   PetscCall(PetscMalloc1(am + 1, &udiag));
2373   ui[0] = 0;
2374 
2375   /* ICC(0) without matrix ordering: simply rearrange column indices */
2376   if (!levels && perm_identity) {
2377     for (i = 0; i < am; i++) {
2378       ncols     = ai[i + 1] - a->diag[i];
2379       ui[i + 1] = ui[i] + ncols;
2380       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2381     }
2382     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2383     cols = uj;
2384     for (i = 0; i < am; i++) {
2385       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2386       ncols = ai[i + 1] - a->diag[i] - 1;
2387       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2388       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2389     }
2390   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2391     PetscCall(ISGetIndices(iperm, &riip));
2392     PetscCall(ISGetIndices(perm, &rip));
2393 
2394     /* initialization */
2395     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2396 
2397     /* jl: linked list for storing indices of the pivot rows
2398        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2399     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2400     for (i = 0; i < am; i++) {
2401       jl[i] = am;
2402       il[i] = 0;
2403     }
2404 
2405     /* create and initialize a linked list for storing column indices of the active row k */
2406     nlnk = am + 1;
2407     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2408 
2409     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2410     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2411     current_space = free_space;
2412     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2413     current_space_lvl = free_space_lvl;
2414 
2415     for (k = 0; k < am; k++) { /* for each active row k */
2416       /* initialize lnk by the column indices of row rip[k] of A */
2417       nzk   = 0;
2418       ncols = ai[rip[k] + 1] - ai[rip[k]];
2419       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2420       ncols_upper = 0;
2421       for (j = 0; j < ncols; j++) {
2422         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2423         if (riip[i] >= k) {         /* only take upper triangular entry */
2424           ajtmp[ncols_upper] = i;
2425           ncols_upper++;
2426         }
2427       }
2428       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2429       nzk += nlnk;
2430 
2431       /* update lnk by computing fill-in for each pivot row to be merged in */
2432       prow = jl[k]; /* 1st pivot row */
2433 
2434       while (prow < k) {
2435         nextprow = jl[prow];
2436 
2437         /* merge prow into k-th row */
2438         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2439         jmax  = ui[prow + 1];
2440         ncols = jmax - jmin;
2441         i     = jmin - ui[prow];
2442         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2443         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2444         j     = *(uj - 1);
2445         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2446         nzk += nlnk;
2447 
2448         /* update il and jl for prow */
2449         if (jmin < jmax) {
2450           il[prow] = jmin;
2451           j        = *cols;
2452           jl[prow] = jl[j];
2453           jl[j]    = prow;
2454         }
2455         prow = nextprow;
2456       }
2457 
2458       /* if free space is not available, make more free space */
2459       if (current_space->local_remaining < nzk) {
2460         i = am - k + 1;                                    /* num of unfactored rows */
2461         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2462         PetscCall(PetscFreeSpaceGet(i, &current_space));
2463         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2464         reallocs++;
2465       }
2466 
2467       /* copy data into free_space and free_space_lvl, then initialize lnk */
2468       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2469       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2470 
2471       /* add the k-th row into il and jl */
2472       if (nzk > 1) {
2473         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2474         jl[k] = jl[i];
2475         jl[i] = k;
2476         il[k] = ui[k] + 1;
2477       }
2478       uj_ptr[k]     = current_space->array;
2479       uj_lvl_ptr[k] = current_space_lvl->array;
2480 
2481       current_space->array += nzk;
2482       current_space->local_used += nzk;
2483       current_space->local_remaining -= nzk;
2484 
2485       current_space_lvl->array += nzk;
2486       current_space_lvl->local_used += nzk;
2487       current_space_lvl->local_remaining -= nzk;
2488 
2489       ui[k + 1] = ui[k] + nzk;
2490     }
2491 
2492     PetscCall(ISRestoreIndices(perm, &rip));
2493     PetscCall(ISRestoreIndices(iperm, &riip));
2494     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2495     PetscCall(PetscFree(ajtmp));
2496 
2497     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2498     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2499     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
2500     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2501     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2502 
2503   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2504 
2505   /* put together the new matrix in MATSEQSBAIJ format */
2506   b               = (Mat_SeqSBAIJ *)(fact)->data;
2507   b->singlemalloc = PETSC_FALSE;
2508 
2509   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2510 
2511   b->j         = uj;
2512   b->i         = ui;
2513   b->diag      = udiag;
2514   b->free_diag = PETSC_TRUE;
2515   b->ilen      = NULL;
2516   b->imax      = NULL;
2517   b->row       = perm;
2518   b->col       = perm;
2519   PetscCall(PetscObjectReference((PetscObject)perm));
2520   PetscCall(PetscObjectReference((PetscObject)perm));
2521   b->icol          = iperm;
2522   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2523 
2524   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2525 
2526   b->maxnz = b->nz = ui[am];
2527   b->free_a        = PETSC_TRUE;
2528   b->free_ij       = PETSC_TRUE;
2529 
2530   fact->info.factor_mallocs   = reallocs;
2531   fact->info.fill_ratio_given = fill;
2532   if (ai[am] != 0) {
2533     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2534     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2535   } else {
2536     fact->info.fill_ratio_needed = 0.0;
2537   }
2538 #if defined(PETSC_USE_INFO)
2539   if (ai[am] != 0) {
2540     PetscReal af = fact->info.fill_ratio_needed;
2541     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2542     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2543     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2544   } else {
2545     PetscCall(PetscInfo(A, "Empty matrix\n"));
2546   }
2547 #endif
2548   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2549   PetscFunctionReturn(PETSC_SUCCESS);
2550 }
2551 
2552 #if 0
2553 // unused
2554 static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2555 {
2556   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2557   Mat_SeqSBAIJ      *b;
2558   PetscBool          perm_identity, missing;
2559   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2560   const PetscInt    *rip, *riip;
2561   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2562   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2563   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2564   PetscReal          fill = info->fill, levels = info->levels;
2565   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2566   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2567   PetscBT            lnkbt;
2568   IS                 iperm;
2569 
2570   PetscFunctionBegin;
2571   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2572   PetscCall(MatMissingDiagonal(A, &missing, &d));
2573   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2574   PetscCall(ISIdentity(perm, &perm_identity));
2575   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2576 
2577   PetscCall(PetscMalloc1(am + 1, &ui));
2578   PetscCall(PetscMalloc1(am + 1, &udiag));
2579   ui[0] = 0;
2580 
2581   /* ICC(0) without matrix ordering: simply copies fill pattern */
2582   if (!levels && perm_identity) {
2583     for (i = 0; i < am; i++) {
2584       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2585       udiag[i]  = ui[i];
2586     }
2587     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2588     cols = uj;
2589     for (i = 0; i < am; i++) {
2590       aj    = a->j + a->diag[i];
2591       ncols = ui[i + 1] - ui[i];
2592       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2593     }
2594   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2595     PetscCall(ISGetIndices(iperm, &riip));
2596     PetscCall(ISGetIndices(perm, &rip));
2597 
2598     /* initialization */
2599     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2600 
2601     /* jl: linked list for storing indices of the pivot rows
2602        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2603     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2604     for (i = 0; i < am; i++) {
2605       jl[i] = am;
2606       il[i] = 0;
2607     }
2608 
2609     /* create and initialize a linked list for storing column indices of the active row k */
2610     nlnk = am + 1;
2611     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2612 
2613     /* initial FreeSpace size is fill*(ai[am]+1) */
2614     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2615     current_space = free_space;
2616     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2617     current_space_lvl = free_space_lvl;
2618 
2619     for (k = 0; k < am; k++) { /* for each active row k */
2620       /* initialize lnk by the column indices of row rip[k] of A */
2621       nzk   = 0;
2622       ncols = ai[rip[k] + 1] - ai[rip[k]];
2623       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2624       ncols_upper = 0;
2625       for (j = 0; j < ncols; j++) {
2626         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2627         if (riip[i] >= k) {         /* only take upper triangular entry */
2628           ajtmp[ncols_upper] = i;
2629           ncols_upper++;
2630         }
2631       }
2632       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2633       nzk += nlnk;
2634 
2635       /* update lnk by computing fill-in for each pivot row to be merged in */
2636       prow = jl[k]; /* 1st pivot row */
2637 
2638       while (prow < k) {
2639         nextprow = jl[prow];
2640 
2641         /* merge prow into k-th row */
2642         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2643         jmax  = ui[prow + 1];
2644         ncols = jmax - jmin;
2645         i     = jmin - ui[prow];
2646         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2647         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2648         j     = *(uj - 1);
2649         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2650         nzk += nlnk;
2651 
2652         /* update il and jl for prow */
2653         if (jmin < jmax) {
2654           il[prow] = jmin;
2655           j        = *cols;
2656           jl[prow] = jl[j];
2657           jl[j]    = prow;
2658         }
2659         prow = nextprow;
2660       }
2661 
2662       /* if free space is not available, make more free space */
2663       if (current_space->local_remaining < nzk) {
2664         i = am - k + 1;                                    /* num of unfactored rows */
2665         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2666         PetscCall(PetscFreeSpaceGet(i, &current_space));
2667         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2668         reallocs++;
2669       }
2670 
2671       /* copy data into free_space and free_space_lvl, then initialize lnk */
2672       PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2673       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2674 
2675       /* add the k-th row into il and jl */
2676       if (nzk > 1) {
2677         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2678         jl[k] = jl[i];
2679         jl[i] = k;
2680         il[k] = ui[k] + 1;
2681       }
2682       uj_ptr[k]     = current_space->array;
2683       uj_lvl_ptr[k] = current_space_lvl->array;
2684 
2685       current_space->array += nzk;
2686       current_space->local_used += nzk;
2687       current_space->local_remaining -= nzk;
2688 
2689       current_space_lvl->array += nzk;
2690       current_space_lvl->local_used += nzk;
2691       current_space_lvl->local_remaining -= nzk;
2692 
2693       ui[k + 1] = ui[k] + nzk;
2694     }
2695 
2696   #if defined(PETSC_USE_INFO)
2697     if (ai[am] != 0) {
2698       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2699       PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2700       PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2701       PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2702     } else {
2703       PetscCall(PetscInfo(A, "Empty matrix\n"));
2704     }
2705   #endif
2706 
2707     PetscCall(ISRestoreIndices(perm, &rip));
2708     PetscCall(ISRestoreIndices(iperm, &riip));
2709     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2710     PetscCall(PetscFree(ajtmp));
2711 
2712     /* destroy list of free space and other temporary array(s) */
2713     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2714     PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2715     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2716     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2717 
2718   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2719 
2720   /* put together the new matrix in MATSEQSBAIJ format */
2721 
2722   b               = (Mat_SeqSBAIJ *)fact->data;
2723   b->singlemalloc = PETSC_FALSE;
2724 
2725   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2726 
2727   b->j         = uj;
2728   b->i         = ui;
2729   b->diag      = udiag;
2730   b->free_diag = PETSC_TRUE;
2731   b->ilen      = NULL;
2732   b->imax      = NULL;
2733   b->row       = perm;
2734   b->col       = perm;
2735 
2736   PetscCall(PetscObjectReference((PetscObject)perm));
2737   PetscCall(PetscObjectReference((PetscObject)perm));
2738 
2739   b->icol          = iperm;
2740   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2741   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2742   b->maxnz = b->nz = ui[am];
2743   b->free_a        = PETSC_TRUE;
2744   b->free_ij       = PETSC_TRUE;
2745 
2746   fact->info.factor_mallocs   = reallocs;
2747   fact->info.fill_ratio_given = fill;
2748   if (ai[am] != 0) {
2749     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2750   } else {
2751     fact->info.fill_ratio_needed = 0.0;
2752   }
2753   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2754   PetscFunctionReturn(PETSC_SUCCESS);
2755 }
2756 #endif
2757 
2758 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2759 {
2760   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2761   Mat_SeqSBAIJ      *b;
2762   PetscBool          perm_identity, missing;
2763   PetscReal          fill = info->fill;
2764   const PetscInt    *rip, *riip;
2765   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2766   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2767   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2768   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2769   PetscBT            lnkbt;
2770   IS                 iperm;
2771 
2772   PetscFunctionBegin;
2773   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2774   PetscCall(MatMissingDiagonal(A, &missing, &i));
2775   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2776 
2777   /* check whether perm is the identity mapping */
2778   PetscCall(ISIdentity(perm, &perm_identity));
2779   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2780   PetscCall(ISGetIndices(iperm, &riip));
2781   PetscCall(ISGetIndices(perm, &rip));
2782 
2783   /* initialization */
2784   PetscCall(PetscMalloc1(am + 1, &ui));
2785   PetscCall(PetscMalloc1(am + 1, &udiag));
2786   ui[0] = 0;
2787 
2788   /* jl: linked list for storing indices of the pivot rows
2789      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2790   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2791   for (i = 0; i < am; i++) {
2792     jl[i] = am;
2793     il[i] = 0;
2794   }
2795 
2796   /* create and initialize a linked list for storing column indices of the active row k */
2797   nlnk = am + 1;
2798   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2799 
2800   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2801   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2802   current_space = free_space;
2803 
2804   for (k = 0; k < am; k++) { /* for each active row k */
2805     /* initialize lnk by the column indices of row rip[k] of A */
2806     nzk   = 0;
2807     ncols = ai[rip[k] + 1] - ai[rip[k]];
2808     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2809     ncols_upper = 0;
2810     for (j = 0; j < ncols; j++) {
2811       i = riip[*(aj + ai[rip[k]] + j)];
2812       if (i >= k) { /* only take upper triangular entry */
2813         cols[ncols_upper] = i;
2814         ncols_upper++;
2815       }
2816     }
2817     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2818     nzk += nlnk;
2819 
2820     /* update lnk by computing fill-in for each pivot row to be merged in */
2821     prow = jl[k]; /* 1st pivot row */
2822 
2823     while (prow < k) {
2824       nextprow = jl[prow];
2825       /* merge prow into k-th row */
2826       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2827       jmax   = ui[prow + 1];
2828       ncols  = jmax - jmin;
2829       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2830       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2831       nzk += nlnk;
2832 
2833       /* update il and jl for prow */
2834       if (jmin < jmax) {
2835         il[prow] = jmin;
2836         j        = *uj_ptr;
2837         jl[prow] = jl[j];
2838         jl[j]    = prow;
2839       }
2840       prow = nextprow;
2841     }
2842 
2843     /* if free space is not available, make more free space */
2844     if (current_space->local_remaining < nzk) {
2845       i = am - k + 1;                                    /* num of unfactored rows */
2846       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2847       PetscCall(PetscFreeSpaceGet(i, &current_space));
2848       reallocs++;
2849     }
2850 
2851     /* copy data into free space, then initialize lnk */
2852     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2853 
2854     /* add the k-th row into il and jl */
2855     if (nzk > 1) {
2856       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2857       jl[k] = jl[i];
2858       jl[i] = k;
2859       il[k] = ui[k] + 1;
2860     }
2861     ui_ptr[k] = current_space->array;
2862 
2863     current_space->array += nzk;
2864     current_space->local_used += nzk;
2865     current_space->local_remaining -= nzk;
2866 
2867     ui[k + 1] = ui[k] + nzk;
2868   }
2869 
2870   PetscCall(ISRestoreIndices(perm, &rip));
2871   PetscCall(ISRestoreIndices(iperm, &riip));
2872   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2873 
2874   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2875   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2876   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2877   PetscCall(PetscLLDestroy(lnk, lnkbt));
2878 
2879   /* put together the new matrix in MATSEQSBAIJ format */
2880 
2881   b               = (Mat_SeqSBAIJ *)fact->data;
2882   b->singlemalloc = PETSC_FALSE;
2883   b->free_a       = PETSC_TRUE;
2884   b->free_ij      = PETSC_TRUE;
2885 
2886   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2887 
2888   b->j         = uj;
2889   b->i         = ui;
2890   b->diag      = udiag;
2891   b->free_diag = PETSC_TRUE;
2892   b->ilen      = NULL;
2893   b->imax      = NULL;
2894   b->row       = perm;
2895   b->col       = perm;
2896 
2897   PetscCall(PetscObjectReference((PetscObject)perm));
2898   PetscCall(PetscObjectReference((PetscObject)perm));
2899 
2900   b->icol          = iperm;
2901   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2902 
2903   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2904 
2905   b->maxnz = b->nz = ui[am];
2906 
2907   fact->info.factor_mallocs   = reallocs;
2908   fact->info.fill_ratio_given = fill;
2909   if (ai[am] != 0) {
2910     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2911     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2912   } else {
2913     fact->info.fill_ratio_needed = 0.0;
2914   }
2915 #if defined(PETSC_USE_INFO)
2916   if (ai[am] != 0) {
2917     PetscReal af = fact->info.fill_ratio_needed;
2918     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2919     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2920     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2921   } else {
2922     PetscCall(PetscInfo(A, "Empty matrix\n"));
2923   }
2924 #endif
2925   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2926   PetscFunctionReturn(PETSC_SUCCESS);
2927 }
2928 
2929 #if 0
2930 // unused
2931 static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2932 {
2933   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2934   Mat_SeqSBAIJ      *b;
2935   PetscBool          perm_identity, missing;
2936   PetscReal          fill = info->fill;
2937   const PetscInt    *rip, *riip;
2938   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2939   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2940   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
2941   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2942   PetscBT            lnkbt;
2943   IS                 iperm;
2944 
2945   PetscFunctionBegin;
2946   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2947   PetscCall(MatMissingDiagonal(A, &missing, &i));
2948   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2949 
2950   /* check whether perm is the identity mapping */
2951   PetscCall(ISIdentity(perm, &perm_identity));
2952   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2953   PetscCall(ISGetIndices(iperm, &riip));
2954   PetscCall(ISGetIndices(perm, &rip));
2955 
2956   /* initialization */
2957   PetscCall(PetscMalloc1(am + 1, &ui));
2958   ui[0] = 0;
2959 
2960   /* jl: linked list for storing indices of the pivot rows
2961      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2962   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2963   for (i = 0; i < am; i++) {
2964     jl[i] = am;
2965     il[i] = 0;
2966   }
2967 
2968   /* create and initialize a linked list for storing column indices of the active row k */
2969   nlnk = am + 1;
2970   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2971 
2972   /* initial FreeSpace size is fill*(ai[am]+1) */
2973   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2974   current_space = free_space;
2975 
2976   for (k = 0; k < am; k++) { /* for each active row k */
2977     /* initialize lnk by the column indices of row rip[k] of A */
2978     nzk   = 0;
2979     ncols = ai[rip[k] + 1] - ai[rip[k]];
2980     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2981     ncols_upper = 0;
2982     for (j = 0; j < ncols; j++) {
2983       i = riip[*(aj + ai[rip[k]] + j)];
2984       if (i >= k) { /* only take upper triangular entry */
2985         cols[ncols_upper] = i;
2986         ncols_upper++;
2987       }
2988     }
2989     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2990     nzk += nlnk;
2991 
2992     /* update lnk by computing fill-in for each pivot row to be merged in */
2993     prow = jl[k]; /* 1st pivot row */
2994 
2995     while (prow < k) {
2996       nextprow = jl[prow];
2997       /* merge prow into k-th row */
2998       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2999       jmax   = ui[prow + 1];
3000       ncols  = jmax - jmin;
3001       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3002       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
3003       nzk += nlnk;
3004 
3005       /* update il and jl for prow */
3006       if (jmin < jmax) {
3007         il[prow] = jmin;
3008         j        = *uj_ptr;
3009         jl[prow] = jl[j];
3010         jl[j]    = prow;
3011       }
3012       prow = nextprow;
3013     }
3014 
3015     /* if free space is not available, make more free space */
3016     if (current_space->local_remaining < nzk) {
3017       i = am - k + 1;                     /* num of unfactored rows */
3018       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3019       PetscCall(PetscFreeSpaceGet(i, &current_space));
3020       reallocs++;
3021     }
3022 
3023     /* copy data into free space, then initialize lnk */
3024     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
3025 
3026     /* add the k-th row into il and jl */
3027     if (nzk - 1 > 0) {
3028       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3029       jl[k] = jl[i];
3030       jl[i] = k;
3031       il[k] = ui[k] + 1;
3032     }
3033     ui_ptr[k] = current_space->array;
3034 
3035     current_space->array += nzk;
3036     current_space->local_used += nzk;
3037     current_space->local_remaining -= nzk;
3038 
3039     ui[k + 1] = ui[k] + nzk;
3040   }
3041 
3042   #if defined(PETSC_USE_INFO)
3043   if (ai[am] != 0) {
3044     PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]);
3045     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3046     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3047     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3048   } else {
3049     PetscCall(PetscInfo(A, "Empty matrix\n"));
3050   }
3051   #endif
3052 
3053   PetscCall(ISRestoreIndices(perm, &rip));
3054   PetscCall(ISRestoreIndices(iperm, &riip));
3055   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
3056 
3057   /* destroy list of free space and other temporary array(s) */
3058   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
3059   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3060   PetscCall(PetscLLDestroy(lnk, lnkbt));
3061 
3062   /* put together the new matrix in MATSEQSBAIJ format */
3063 
3064   b               = (Mat_SeqSBAIJ *)fact->data;
3065   b->singlemalloc = PETSC_FALSE;
3066   b->free_a       = PETSC_TRUE;
3067   b->free_ij      = PETSC_TRUE;
3068 
3069   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
3070 
3071   b->j    = uj;
3072   b->i    = ui;
3073   b->diag = NULL;
3074   b->ilen = NULL;
3075   b->imax = NULL;
3076   b->row  = perm;
3077   b->col  = perm;
3078 
3079   PetscCall(PetscObjectReference((PetscObject)perm));
3080   PetscCall(PetscObjectReference((PetscObject)perm));
3081 
3082   b->icol          = iperm;
3083   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3084 
3085   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3086   b->maxnz = b->nz = ui[am];
3087 
3088   fact->info.factor_mallocs   = reallocs;
3089   fact->info.fill_ratio_given = fill;
3090   if (ai[am] != 0) {
3091     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
3092   } else {
3093     fact->info.fill_ratio_needed = 0.0;
3094   }
3095   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3096   PetscFunctionReturn(PETSC_SUCCESS);
3097 }
3098 #endif
3099 
3100 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3101 {
3102   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3103   PetscInt           n  = A->rmap->n;
3104   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3105   PetscScalar       *x, sum;
3106   const PetscScalar *b;
3107   const MatScalar   *aa = a->a, *v;
3108   PetscInt           i, nz;
3109 
3110   PetscFunctionBegin;
3111   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3112 
3113   PetscCall(VecGetArrayRead(bb, &b));
3114   PetscCall(VecGetArrayWrite(xx, &x));
3115 
3116   /* forward solve the lower triangular */
3117   x[0] = b[0];
3118   v    = aa;
3119   vi   = aj;
3120   for (i = 1; i < n; i++) {
3121     nz  = ai[i + 1] - ai[i];
3122     sum = b[i];
3123     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3124     v += nz;
3125     vi += nz;
3126     x[i] = sum;
3127   }
3128 
3129   /* backward solve the upper triangular */
3130   for (i = n - 1; i >= 0; i--) {
3131     v   = aa + adiag[i + 1] + 1;
3132     vi  = aj + adiag[i + 1] + 1;
3133     nz  = adiag[i] - adiag[i + 1] - 1;
3134     sum = x[i];
3135     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3136     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3137   }
3138 
3139   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3140   PetscCall(VecRestoreArrayRead(bb, &b));
3141   PetscCall(VecRestoreArrayWrite(xx, &x));
3142   PetscFunctionReturn(PETSC_SUCCESS);
3143 }
3144 
3145 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3146 {
3147   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3148   IS                 iscol = a->col, isrow = a->row;
3149   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3150   const PetscInt    *rout, *cout, *r, *c;
3151   PetscScalar       *x, *tmp, sum;
3152   const PetscScalar *b;
3153   const MatScalar   *aa = a->a, *v;
3154 
3155   PetscFunctionBegin;
3156   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3157 
3158   PetscCall(VecGetArrayRead(bb, &b));
3159   PetscCall(VecGetArrayWrite(xx, &x));
3160   tmp = a->solve_work;
3161 
3162   PetscCall(ISGetIndices(isrow, &rout));
3163   r = rout;
3164   PetscCall(ISGetIndices(iscol, &cout));
3165   c = cout;
3166 
3167   /* forward solve the lower triangular */
3168   tmp[0] = b[r[0]];
3169   v      = aa;
3170   vi     = aj;
3171   for (i = 1; i < n; i++) {
3172     nz  = ai[i + 1] - ai[i];
3173     sum = b[r[i]];
3174     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3175     tmp[i] = sum;
3176     v += nz;
3177     vi += nz;
3178   }
3179 
3180   /* backward solve the upper triangular */
3181   for (i = n - 1; i >= 0; i--) {
3182     v   = aa + adiag[i + 1] + 1;
3183     vi  = aj + adiag[i + 1] + 1;
3184     nz  = adiag[i] - adiag[i + 1] - 1;
3185     sum = tmp[i];
3186     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3187     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3188   }
3189 
3190   PetscCall(ISRestoreIndices(isrow, &rout));
3191   PetscCall(ISRestoreIndices(iscol, &cout));
3192   PetscCall(VecRestoreArrayRead(bb, &b));
3193   PetscCall(VecRestoreArrayWrite(xx, &x));
3194   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3195   PetscFunctionReturn(PETSC_SUCCESS);
3196 }
3197 
3198 #if 0
3199 // unused
3200 /*
3201     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3202 */
3203 static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3204 {
3205   Mat             B = *fact;
3206   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3207   IS              isicol;
3208   const PetscInt *r, *ic;
3209   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3210   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3211   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3212   PetscInt        nlnk, *lnk;
3213   PetscBT         lnkbt;
3214   PetscBool       row_identity, icol_identity;
3215   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3216   const PetscInt *ics;
3217   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3218   PetscReal       dt = info->dt, shift = info->shiftamount;
3219   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3220   PetscBool       missing;
3221 
3222   PetscFunctionBegin;
3223   if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005;
3224   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3225 
3226   /* symbolic factorization, can be reused */
3227   PetscCall(MatMissingDiagonal(A, &missing, &i));
3228   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3229   adiag = a->diag;
3230 
3231   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3232 
3233   /* bdiag is location of diagonal in factor */
3234   PetscCall(PetscMalloc1(n + 1, &bdiag));     /* becomes b->diag */
3235   PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */
3236 
3237   /* allocate row pointers bi */
3238   PetscCall(PetscMalloc1(2 * n + 2, &bi));
3239 
3240   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3241   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3242   nnz_max = ai[n] + 2 * n * dtcount + 2;
3243 
3244   PetscCall(PetscMalloc1(nnz_max + 1, &bj));
3245   PetscCall(PetscMalloc1(nnz_max + 1, &ba));
3246 
3247   /* put together the new matrix */
3248   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3249   b = (Mat_SeqAIJ *)B->data;
3250 
3251   b->free_a       = PETSC_TRUE;
3252   b->free_ij      = PETSC_TRUE;
3253   b->singlemalloc = PETSC_FALSE;
3254 
3255   b->a    = ba;
3256   b->j    = bj;
3257   b->i    = bi;
3258   b->diag = bdiag;
3259   b->ilen = NULL;
3260   b->imax = NULL;
3261   b->row  = isrow;
3262   b->col  = iscol;
3263   PetscCall(PetscObjectReference((PetscObject)isrow));
3264   PetscCall(PetscObjectReference((PetscObject)iscol));
3265   b->icol = isicol;
3266 
3267   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3268   b->maxnz = nnz_max;
3269 
3270   B->factortype            = MAT_FACTOR_ILUDT;
3271   B->info.factor_mallocs   = 0;
3272   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3273   /*  end of symbolic factorization */
3274 
3275   PetscCall(ISGetIndices(isrow, &r));
3276   PetscCall(ISGetIndices(isicol, &ic));
3277   ics = ic;
3278 
3279   /* linked list for storing column indices of the active row */
3280   nlnk = n + 1;
3281   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
3282 
3283   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3284   PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3285   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3286   PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3287   PetscCall(PetscArrayzero(rtmp, n));
3288 
3289   bi[0]         = 0;
3290   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3291   bdiag_rev[n]  = bdiag[0];
3292   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3293   for (i = 0; i < n; i++) {
3294     /* copy initial fill into linked list */
3295     nzi = ai[r[i] + 1] - ai[r[i]];
3296     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3297     nzi_al = adiag[r[i]] - ai[r[i]];
3298     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3299     ajtmp  = aj + ai[r[i]];
3300     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3301 
3302     /* load in initial (unfactored row) */
3303     aatmp = a->a + ai[r[i]];
3304     for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;
3305 
3306     /* add pivot rows into linked list */
3307     row = lnk[n];
3308     while (row < i) {
3309       nzi_bl = bi[row + 1] - bi[row] + 1;
3310       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3311       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3312       nzi += nlnk;
3313       row = lnk[row];
3314     }
3315 
3316     /* copy data from lnk into jtmp, then initialize lnk */
3317     PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));
3318 
3319     /* numerical factorization */
3320     bjtmp = jtmp;
3321     row   = *bjtmp++; /* 1st pivot row */
3322     while (row < i) {
3323       pc         = rtmp + row;
3324       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3325       multiplier = (*pc) * (*pv);
3326       *pc        = multiplier;
3327       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3328         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3329         pv = ba + bdiag[row + 1] + 1;
3330         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3331         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3332         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3333       }
3334       row = *bjtmp++;
3335     }
3336 
3337     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3338     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3339     nzi_bl   = 0;
3340     j        = 0;
3341     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3342       vtmp[j]       = rtmp[jtmp[j]];
3343       rtmp[jtmp[j]] = 0.0;
3344       nzi_bl++;
3345       j++;
3346     }
3347     nzi_bu = nzi - nzi_bl - 1;
3348     while (j < nzi) {
3349       vtmp[j]       = rtmp[jtmp[j]];
3350       rtmp[jtmp[j]] = 0.0;
3351       j++;
3352     }
3353 
3354     bjtmp = bj + bi[i];
3355     batmp = ba + bi[i];
3356     /* apply level dropping rule to L part */
3357     ncut = nzi_al + dtcount;
3358     if (ncut < nzi_bl) {
3359       PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3360       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3361     } else {
3362       ncut = nzi_bl;
3363     }
3364     for (j = 0; j < ncut; j++) {
3365       bjtmp[j] = jtmp[j];
3366       batmp[j] = vtmp[j];
3367     }
3368     bi[i + 1] = bi[i] + ncut;
3369     nzi       = ncut + 1;
3370 
3371     /* apply level dropping rule to U part */
3372     ncut = nzi_au + dtcount;
3373     if (ncut < nzi_bu) {
3374       PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3375       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3376     } else {
3377       ncut = nzi_bu;
3378     }
3379     nzi += ncut;
3380 
3381     /* mark bdiagonal */
3382     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3383     bdiag_rev[n - i - 1] = bdiag[i + 1];
3384     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
3385     bjtmp                = bj + bdiag[i];
3386     batmp                = ba + bdiag[i];
3387     *bjtmp               = i;
3388     *batmp               = diag_tmp; /* rtmp[i]; */
3389     if (*batmp == 0.0) *batmp = dt + shift;
3390     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
3391 
3392     bjtmp = bj + bdiag[i + 1] + 1;
3393     batmp = ba + bdiag[i + 1] + 1;
3394     for (k = 0; k < ncut; k++) {
3395       bjtmp[k] = jtmp[nzi_bl + 1 + k];
3396       batmp[k] = vtmp[nzi_bl + 1 + k];
3397     }
3398 
3399     im[i] = nzi; /* used by PetscLLAddSortedLU() */
3400   }              /* for (i=0; i<n; i++) */
3401   PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);
3402 
3403   PetscCall(ISRestoreIndices(isrow, &r));
3404   PetscCall(ISRestoreIndices(isicol, &ic));
3405 
3406   PetscCall(PetscLLDestroy(lnk, lnkbt));
3407   PetscCall(PetscFree2(im, jtmp));
3408   PetscCall(PetscFree2(rtmp, vtmp));
3409   PetscCall(PetscFree(bdiag_rev));
3410 
3411   PetscCall(PetscLogFlops(B->cmap->n));
3412   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3413 
3414   PetscCall(ISIdentity(isrow, &row_identity));
3415   PetscCall(ISIdentity(isicol, &icol_identity));
3416   if (row_identity && icol_identity) {
3417     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3418   } else {
3419     B->ops->solve = MatSolve_SeqAIJ;
3420   }
3421 
3422   B->ops->solveadd          = NULL;
3423   B->ops->solvetranspose    = NULL;
3424   B->ops->solvetransposeadd = NULL;
3425   B->ops->matsolve          = NULL;
3426   B->assembled              = PETSC_TRUE;
3427   B->preallocated           = PETSC_TRUE;
3428   PetscFunctionReturn(PETSC_SUCCESS);
3429 }
3430 
3431 /* a wrapper of MatILUDTFactor_SeqAIJ() */
3432 /*
3433     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3434 */
3435 
3436 static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3437 {
3438   PetscFunctionBegin;
3439   PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3440   PetscFunctionReturn(PETSC_SUCCESS);
3441 }
3442 
3443 /*
3444    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3445    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3446 */
3447 /*
3448     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3449 */
3450 
3451 static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3452 {
3453   Mat             C = fact;
3454   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3455   IS              isrow = b->row, isicol = b->icol;
3456   const PetscInt *r, *ic, *ics;
3457   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3458   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3459   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3460   PetscReal       dt = info->dt, shift = info->shiftamount;
3461   PetscBool       row_identity, col_identity;
3462 
3463   PetscFunctionBegin;
3464   PetscCall(ISGetIndices(isrow, &r));
3465   PetscCall(ISGetIndices(isicol, &ic));
3466   PetscCall(PetscMalloc1(n + 1, &rtmp));
3467   ics = ic;
3468 
3469   for (i = 0; i < n; i++) {
3470     /* initialize rtmp array */
3471     nzl   = bi[i + 1] - bi[i]; /* num of nozeros in L(i,:) */
3472     bjtmp = bj + bi[i];
3473     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3474     rtmp[i] = 0.0;
3475     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nozeros in U(i,:) */
3476     bjtmp   = bj + bdiag[i + 1] + 1;
3477     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
3478 
3479     /* load in initial unfactored row of A */
3480     nz    = ai[r[i] + 1] - ai[r[i]];
3481     ajtmp = aj + ai[r[i]];
3482     v     = aa + ai[r[i]];
3483     for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];
3484 
3485     /* numerical factorization */
3486     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3487     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3488     k     = 0;
3489     while (k < nzl) {
3490       row        = *bjtmp++;
3491       pc         = rtmp + row;
3492       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3493       multiplier = (*pc) * (*pv);
3494       *pc        = multiplier;
3495       if (PetscAbsScalar(multiplier) > dt) {
3496         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3497         pv = b->a + bdiag[row + 1] + 1;
3498         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3499         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3500         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3501       }
3502       k++;
3503     }
3504 
3505     /* finished row so stick it into b->a */
3506     /* L-part */
3507     pv  = b->a + bi[i];
3508     pj  = bj + bi[i];
3509     nzl = bi[i + 1] - bi[i];
3510     for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];
3511 
3512     /* diagonal: invert diagonal entries for simpler triangular solves */
3513     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3514     b->a[bdiag[i]] = 1.0 / rtmp[i];
3515 
3516     /* U-part */
3517     pv  = b->a + bdiag[i + 1] + 1;
3518     pj  = bj + bdiag[i + 1] + 1;
3519     nzu = bdiag[i] - bdiag[i + 1] - 1;
3520     for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3521   }
3522 
3523   PetscCall(PetscFree(rtmp));
3524   PetscCall(ISRestoreIndices(isicol, &ic));
3525   PetscCall(ISRestoreIndices(isrow, &r));
3526 
3527   PetscCall(ISIdentity(isrow, &row_identity));
3528   PetscCall(ISIdentity(isicol, &col_identity));
3529   if (row_identity && col_identity) {
3530     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3531   } else {
3532     C->ops->solve = MatSolve_SeqAIJ;
3533   }
3534   C->ops->solveadd          = NULL;
3535   C->ops->solvetranspose    = NULL;
3536   C->ops->solvetransposeadd = NULL;
3537   C->ops->matsolve          = NULL;
3538   C->assembled              = PETSC_TRUE;
3539   C->preallocated           = PETSC_TRUE;
3540 
3541   PetscCall(PetscLogFlops(C->cmap->n));
3542   PetscFunctionReturn(PETSC_SUCCESS);
3543 }
3544 #endif
3545