xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision bcee047adeeb73090d7e36cc71e39fc287cdbb97)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/sbaij/seq/sbaij.h>
3 #include <petscbt.h>
4 #include <../src/mat/utils/freespace.h>
5 
6 /*
7       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
8 
9       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
10 */
11 #if 0
12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
13 {
14   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
15   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
16   const PetscInt    *ai = a->i, *aj = a->j;
17   const PetscScalar *aa = a->a;
18   PetscBool         *done;
19   PetscReal          best, past = 0, future;
20 
21   PetscFunctionBegin;
22   /* pick initial row */
23   best = -1;
24   for (i = 0; i < n; i++) {
25     future = 0.0;
26     for (j = ai[i]; j < ai[i + 1]; j++) {
27       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
28       else past = PetscAbsScalar(aa[j]);
29     }
30     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
31     if (past / future > best) {
32       best    = past / future;
33       current = i;
34     }
35   }
36 
37   PetscCall(PetscMalloc1(n, &done));
38   PetscCall(PetscArrayzero(done, n));
39   PetscCall(PetscMalloc1(n, &order));
40   order[0] = current;
41   for (i = 0; i < n - 1; i++) {
42     done[current] = PETSC_TRUE;
43     best          = -1;
44     /* loop over all neighbors of current pivot */
45     for (j = ai[current]; j < ai[current + 1]; j++) {
46       jj = aj[j];
47       if (done[jj]) continue;
48       /* loop over columns of potential next row computing weights for below and above diagonal */
49       past = future = 0.0;
50       for (k = ai[jj]; k < ai[jj + 1]; k++) {
51         kk = aj[k];
52         if (done[kk]) past += PetscAbsScalar(aa[k]);
53         else if (kk != jj) future += PetscAbsScalar(aa[k]);
54       }
55       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
56       if (past / future > best) {
57         best       = past / future;
58         newcurrent = jj;
59       }
60     }
61     if (best == -1) { /* no neighbors to select from so select best of all that remain */
62       best = -1;
63       for (k = 0; k < n; k++) {
64         if (done[k]) continue;
65         future = 0.0;
66         past   = 0.0;
67         for (j = ai[k]; j < ai[k + 1]; j++) {
68           kk = aj[j];
69           if (done[kk]) past += PetscAbsScalar(aa[j]);
70           else if (kk != k) future += PetscAbsScalar(aa[j]);
71         }
72         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
73         if (past / future > best) {
74           best       = past / future;
75           newcurrent = k;
76         }
77       }
78     }
79     PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
80     current      = newcurrent;
81     order[i + 1] = current;
82   }
83   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
84   *icol = *irow;
85   PetscCall(PetscObjectReference((PetscObject)*irow));
86   PetscCall(PetscFree(done));
87   PetscCall(PetscFree(order));
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 #endif
91 
92 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
93 {
94   PetscFunctionBegin;
95   *type = MATSOLVERPETSC;
96   PetscFunctionReturn(PETSC_SUCCESS);
97 }
98 
99 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
100 {
101   PetscInt n = A->rmap->n;
102 
103   PetscFunctionBegin;
104 #if defined(PETSC_USE_COMPLEX)
105   if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
106     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
107     *B = NULL;
108     PetscFunctionReturn(PETSC_SUCCESS);
109   }
110 #endif
111 
112   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
113   PetscCall(MatSetSizes(*B, n, n, n, n));
114   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
115     PetscCall(MatSetType(*B, MATSEQAIJ));
116 
117     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
118     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
119 
120     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
121     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
122     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
123     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
124   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
125     PetscCall(MatSetType(*B, MATSEQSBAIJ));
126     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
127 
128     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
129     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
130     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
131     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
132   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
133   (*B)->factortype = ftype;
134 
135   PetscCall(PetscFree((*B)->solvertype));
136   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
137   (*B)->canuseordering = PETSC_TRUE;
138   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
139   PetscFunctionReturn(PETSC_SUCCESS);
140 }
141 
142 #if 0
143 // currently unused
144 static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
145 {
146   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
147   IS                 isicol;
148   const PetscInt    *r, *ic;
149   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
150   PetscInt          *bi, *bj, *ajtmp;
151   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
152   PetscReal          f;
153   PetscInt           nlnk, *lnk, k, **bi_ptr;
154   PetscFreeSpaceList free_space = NULL, current_space = NULL;
155   PetscBT            lnkbt;
156   PetscBool          missing;
157 
158   PetscFunctionBegin;
159   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
160   PetscCall(MatMissingDiagonal(A, &missing, &i));
161   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
162 
163   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
164   PetscCall(ISGetIndices(isrow, &r));
165   PetscCall(ISGetIndices(isicol, &ic));
166 
167   /* get new row pointers */
168   PetscCall(PetscMalloc1(n + 1, &bi));
169   bi[0] = 0;
170 
171   /* bdiag is location of diagonal in factor */
172   PetscCall(PetscMalloc1(n + 1, &bdiag));
173   bdiag[0] = 0;
174 
175   /* linked list for storing column indices of the active row */
176   nlnk = n + 1;
177   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
178 
179   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
180 
181   /* initial FreeSpace size is f*(ai[n]+1) */
182   f = info->fill;
183   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
184   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
185   current_space = free_space;
186 
187   for (i = 0; i < n; i++) {
188     /* copy previous fill into linked list */
189     nzi   = 0;
190     nnz   = ai[r[i] + 1] - ai[r[i]];
191     ajtmp = aj + ai[r[i]];
192     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
193     nzi += nlnk;
194 
195     /* add pivot rows into linked list */
196     row = lnk[n];
197     while (row < i) {
198       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
199       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
200       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
201       nzi += nlnk;
202       row = lnk[row];
203     }
204     bi[i + 1] = bi[i] + nzi;
205     im[i]     = nzi;
206 
207     /* mark bdiag */
208     nzbd = 0;
209     nnz  = nzi;
210     k    = lnk[n];
211     while (nnz-- && k < i) {
212       nzbd++;
213       k = lnk[k];
214     }
215     bdiag[i] = bi[i] + nzbd;
216 
217     /* if free space is not available, make more free space */
218     if (current_space->local_remaining < nzi) {
219       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
220       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
221       reallocs++;
222     }
223 
224     /* copy data into free space, then initialize lnk */
225     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
226 
227     bi_ptr[i] = current_space->array;
228     current_space->array += nzi;
229     current_space->local_used += nzi;
230     current_space->local_remaining -= nzi;
231   }
232   #if defined(PETSC_USE_INFO)
233   if (ai[n] != 0) {
234     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
235     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
236     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
237     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
238     PetscCall(PetscInfo(A, "for best performance.\n"));
239   } else {
240     PetscCall(PetscInfo(A, "Empty matrix\n"));
241   }
242   #endif
243 
244   PetscCall(ISRestoreIndices(isrow, &r));
245   PetscCall(ISRestoreIndices(isicol, &ic));
246 
247   /* destroy list of free space and other temporary array(s) */
248   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
249   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
250   PetscCall(PetscLLDestroy(lnk, lnkbt));
251   PetscCall(PetscFree2(bi_ptr, im));
252 
253   /* put together the new matrix */
254   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
255   b = (Mat_SeqAIJ *)(B)->data;
256 
257   b->free_a       = PETSC_TRUE;
258   b->free_ij      = PETSC_TRUE;
259   b->singlemalloc = PETSC_FALSE;
260 
261   PetscCall(PetscMalloc1(bi[n] + 1, &b->a));
262   b->j    = bj;
263   b->i    = bi;
264   b->diag = bdiag;
265   b->ilen = NULL;
266   b->imax = NULL;
267   b->row  = isrow;
268   b->col  = iscol;
269   PetscCall(PetscObjectReference((PetscObject)isrow));
270   PetscCall(PetscObjectReference((PetscObject)iscol));
271   b->icol = isicol;
272   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
273 
274   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
275   b->maxnz = b->nz = bi[n];
276 
277   (B)->factortype            = MAT_FACTOR_LU;
278   (B)->info.factor_mallocs   = reallocs;
279   (B)->info.fill_ratio_given = f;
280 
281   if (ai[n]) {
282     (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
283   } else {
284     (B)->info.fill_ratio_needed = 0.0;
285   }
286   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
287   if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
288   PetscFunctionReturn(PETSC_SUCCESS);
289 }
290 #endif
291 
292 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
293 {
294   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
295   IS                 isicol;
296   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
297   PetscInt           i, n = A->rmap->n;
298   PetscInt          *bi, *bj;
299   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
300   PetscReal          f;
301   PetscInt           nlnk, *lnk, k, **bi_ptr;
302   PetscFreeSpaceList free_space = NULL, current_space = NULL;
303   PetscBT            lnkbt;
304   PetscBool          missing;
305 
306   PetscFunctionBegin;
307   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
308   PetscCall(MatMissingDiagonal(A, &missing, &i));
309   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
310 
311   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
312   PetscCall(ISGetIndices(isrow, &r));
313   PetscCall(ISGetIndices(isicol, &ic));
314 
315   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
316   PetscCall(PetscMalloc1(n + 1, &bi));
317   PetscCall(PetscMalloc1(n + 1, &bdiag));
318   bi[0] = bdiag[0] = 0;
319 
320   /* linked list for storing column indices of the active row */
321   nlnk = n + 1;
322   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
323 
324   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
325 
326   /* initial FreeSpace size is f*(ai[n]+1) */
327   f = info->fill;
328   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
329   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
330   current_space = free_space;
331 
332   for (i = 0; i < n; i++) {
333     /* copy previous fill into linked list */
334     nzi   = 0;
335     nnz   = ai[r[i] + 1] - ai[r[i]];
336     ajtmp = aj + ai[r[i]];
337     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
338     nzi += nlnk;
339 
340     /* add pivot rows into linked list */
341     row = lnk[n];
342     while (row < i) {
343       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
344       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
345       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
346       nzi += nlnk;
347       row = lnk[row];
348     }
349     bi[i + 1] = bi[i] + nzi;
350     im[i]     = nzi;
351 
352     /* mark bdiag */
353     nzbd = 0;
354     nnz  = nzi;
355     k    = lnk[n];
356     while (nnz-- && k < i) {
357       nzbd++;
358       k = lnk[k];
359     }
360     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
361 
362     /* if free space is not available, make more free space */
363     if (current_space->local_remaining < nzi) {
364       /* estimated additional space needed */
365       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
366       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
367       reallocs++;
368     }
369 
370     /* copy data into free space, then initialize lnk */
371     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
372 
373     bi_ptr[i] = current_space->array;
374     current_space->array += nzi;
375     current_space->local_used += nzi;
376     current_space->local_remaining -= nzi;
377   }
378 
379   PetscCall(ISRestoreIndices(isrow, &r));
380   PetscCall(ISRestoreIndices(isicol, &ic));
381 
382   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
383   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
384   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
385   PetscCall(PetscLLDestroy(lnk, lnkbt));
386   PetscCall(PetscFree2(bi_ptr, im));
387 
388   /* put together the new matrix */
389   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
390   b = (Mat_SeqAIJ *)(B)->data;
391 
392   b->free_a       = PETSC_TRUE;
393   b->free_ij      = PETSC_TRUE;
394   b->singlemalloc = PETSC_FALSE;
395 
396   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
397 
398   b->j    = bj;
399   b->i    = bi;
400   b->diag = bdiag;
401   b->ilen = NULL;
402   b->imax = NULL;
403   b->row  = isrow;
404   b->col  = iscol;
405   PetscCall(PetscObjectReference((PetscObject)isrow));
406   PetscCall(PetscObjectReference((PetscObject)iscol));
407   b->icol = isicol;
408   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
409 
410   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
411   b->maxnz = b->nz = bdiag[0] + 1;
412 
413   B->factortype            = MAT_FACTOR_LU;
414   B->info.factor_mallocs   = reallocs;
415   B->info.fill_ratio_given = f;
416 
417   if (ai[n]) {
418     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
419   } else {
420     B->info.fill_ratio_needed = 0.0;
421   }
422 #if defined(PETSC_USE_INFO)
423   if (ai[n] != 0) {
424     PetscReal af = B->info.fill_ratio_needed;
425     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
426     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
427     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
428     PetscCall(PetscInfo(A, "for best performance.\n"));
429   } else {
430     PetscCall(PetscInfo(A, "Empty matrix\n"));
431   }
432 #endif
433   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
434   if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
435   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
436   PetscFunctionReturn(PETSC_SUCCESS);
437 }
438 
439 /*
440     Trouble in factorization, should we dump the original matrix?
441 */
442 PetscErrorCode MatFactorDumpMatrix(Mat A)
443 {
444   PetscBool flg = PETSC_FALSE;
445 
446   PetscFunctionBegin;
447   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
448   if (flg) {
449     PetscViewer viewer;
450     char        filename[PETSC_MAX_PATH_LEN];
451 
452     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
453     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
454     PetscCall(MatView(A, viewer));
455     PetscCall(PetscViewerDestroy(&viewer));
456   }
457   PetscFunctionReturn(PETSC_SUCCESS);
458 }
459 
460 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
461 {
462   Mat              C = B;
463   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
464   IS               isrow = b->row, isicol = b->icol;
465   const PetscInt  *r, *ic, *ics;
466   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
467   PetscInt         i, j, k, nz, nzL, row, *pj;
468   const PetscInt  *ajtmp, *bjtmp;
469   MatScalar       *rtmp, *pc, multiplier, *pv;
470   const MatScalar *aa = a->a, *v;
471   PetscBool        row_identity, col_identity;
472   FactorShiftCtx   sctx;
473   const PetscInt  *ddiag;
474   PetscReal        rs;
475   MatScalar        d;
476 
477   PetscFunctionBegin;
478   /* MatPivotSetUp(): initialize shift context sctx */
479   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
480 
481   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
482     ddiag          = a->diag;
483     sctx.shift_top = info->zeropivot;
484     for (i = 0; i < n; i++) {
485       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
486       d  = (aa)[ddiag[i]];
487       rs = -PetscAbsScalar(d) - PetscRealPart(d);
488       v  = aa + ai[i];
489       nz = ai[i + 1] - ai[i];
490       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
491       if (rs > sctx.shift_top) sctx.shift_top = rs;
492     }
493     sctx.shift_top *= 1.1;
494     sctx.nshift_max = 5;
495     sctx.shift_lo   = 0.;
496     sctx.shift_hi   = 1.;
497   }
498 
499   PetscCall(ISGetIndices(isrow, &r));
500   PetscCall(ISGetIndices(isicol, &ic));
501   PetscCall(PetscMalloc1(n + 1, &rtmp));
502   ics = ic;
503 
504   do {
505     sctx.newshift = PETSC_FALSE;
506     for (i = 0; i < n; i++) {
507       /* zero rtmp */
508       /* L part */
509       nz    = bi[i + 1] - bi[i];
510       bjtmp = bj + bi[i];
511       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
512 
513       /* U part */
514       nz    = bdiag[i] - bdiag[i + 1];
515       bjtmp = bj + bdiag[i + 1] + 1;
516       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
517 
518       /* load in initial (unfactored row) */
519       nz    = ai[r[i] + 1] - ai[r[i]];
520       ajtmp = aj + ai[r[i]];
521       v     = aa + ai[r[i]];
522       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
523       /* ZeropivotApply() */
524       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
525 
526       /* elimination */
527       bjtmp = bj + bi[i];
528       row   = *bjtmp++;
529       nzL   = bi[i + 1] - bi[i];
530       for (k = 0; k < nzL; k++) {
531         pc = rtmp + row;
532         if (*pc != 0.0) {
533           pv         = b->a + bdiag[row];
534           multiplier = *pc * (*pv);
535           *pc        = multiplier;
536 
537           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
538           pv = b->a + bdiag[row + 1] + 1;
539           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
540 
541           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
542           PetscCall(PetscLogFlops(1 + 2.0 * nz));
543         }
544         row = *bjtmp++;
545       }
546 
547       /* finished row so stick it into b->a */
548       rs = 0.0;
549       /* L part */
550       pv = b->a + bi[i];
551       pj = b->j + bi[i];
552       nz = bi[i + 1] - bi[i];
553       for (j = 0; j < nz; j++) {
554         pv[j] = rtmp[pj[j]];
555         rs += PetscAbsScalar(pv[j]);
556       }
557 
558       /* U part */
559       pv = b->a + bdiag[i + 1] + 1;
560       pj = b->j + bdiag[i + 1] + 1;
561       nz = bdiag[i] - bdiag[i + 1] - 1;
562       for (j = 0; j < nz; j++) {
563         pv[j] = rtmp[pj[j]];
564         rs += PetscAbsScalar(pv[j]);
565       }
566 
567       sctx.rs = rs;
568       sctx.pv = rtmp[i];
569       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
570       if (sctx.newshift) break; /* break for-loop */
571       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
572 
573       /* Mark diagonal and invert diagonal for simpler triangular solves */
574       pv  = b->a + bdiag[i];
575       *pv = 1.0 / rtmp[i];
576 
577     } /* endof for (i=0; i<n; i++) { */
578 
579     /* MatPivotRefine() */
580     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
581       /*
582        * if no shift in this attempt & shifting & started shifting & can refine,
583        * then try lower shift
584        */
585       sctx.shift_hi       = sctx.shift_fraction;
586       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
587       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
588       sctx.newshift       = PETSC_TRUE;
589       sctx.nshift++;
590     }
591   } while (sctx.newshift);
592 
593   PetscCall(PetscFree(rtmp));
594   PetscCall(ISRestoreIndices(isicol, &ic));
595   PetscCall(ISRestoreIndices(isrow, &r));
596 
597   PetscCall(ISIdentity(isrow, &row_identity));
598   PetscCall(ISIdentity(isicol, &col_identity));
599   if (b->inode.size) {
600     C->ops->solve = MatSolve_SeqAIJ_Inode;
601   } else if (row_identity && col_identity) {
602     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
603   } else {
604     C->ops->solve = MatSolve_SeqAIJ;
605   }
606   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
607   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
608   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
609   C->ops->matsolve          = MatMatSolve_SeqAIJ;
610   C->assembled              = PETSC_TRUE;
611   C->preallocated           = PETSC_TRUE;
612 
613   PetscCall(PetscLogFlops(C->cmap->n));
614 
615   /* MatShiftView(A,info,&sctx) */
616   if (sctx.nshift) {
617     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
618       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
619     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
620       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
621     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
622       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
623     }
624   }
625   PetscFunctionReturn(PETSC_SUCCESS);
626 }
627 
628 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
629 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
630 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
631 
632 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
633 {
634   Mat              C = B;
635   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
636   IS               isrow = b->row, isicol = b->icol;
637   const PetscInt  *r, *ic, *ics;
638   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
639   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
640   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
641   MatScalar       *pv, *rtmp, *pc, multiplier, d;
642   const MatScalar *v, *aa = a->a;
643   PetscReal        rs = 0.0;
644   FactorShiftCtx   sctx;
645   const PetscInt  *ddiag;
646   PetscBool        row_identity, col_identity;
647 
648   PetscFunctionBegin;
649   /* MatPivotSetUp(): initialize shift context sctx */
650   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
651 
652   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
653     ddiag          = a->diag;
654     sctx.shift_top = info->zeropivot;
655     for (i = 0; i < n; i++) {
656       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
657       d  = (aa)[ddiag[i]];
658       rs = -PetscAbsScalar(d) - PetscRealPart(d);
659       v  = aa + ai[i];
660       nz = ai[i + 1] - ai[i];
661       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
662       if (rs > sctx.shift_top) sctx.shift_top = rs;
663     }
664     sctx.shift_top *= 1.1;
665     sctx.nshift_max = 5;
666     sctx.shift_lo   = 0.;
667     sctx.shift_hi   = 1.;
668   }
669 
670   PetscCall(ISGetIndices(isrow, &r));
671   PetscCall(ISGetIndices(isicol, &ic));
672   PetscCall(PetscMalloc1(n + 1, &rtmp));
673   ics = ic;
674 
675   do {
676     sctx.newshift = PETSC_FALSE;
677     for (i = 0; i < n; i++) {
678       nz    = bi[i + 1] - bi[i];
679       bjtmp = bj + bi[i];
680       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
681 
682       /* load in initial (unfactored row) */
683       nz    = ai[r[i] + 1] - ai[r[i]];
684       ajtmp = aj + ai[r[i]];
685       v     = aa + ai[r[i]];
686       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
687       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
688 
689       row = *bjtmp++;
690       while (row < i) {
691         pc = rtmp + row;
692         if (*pc != 0.0) {
693           pv         = b->a + diag_offset[row];
694           pj         = b->j + diag_offset[row] + 1;
695           multiplier = *pc / *pv++;
696           *pc        = multiplier;
697           nz         = bi[row + 1] - diag_offset[row] - 1;
698           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
699           PetscCall(PetscLogFlops(1 + 2.0 * nz));
700         }
701         row = *bjtmp++;
702       }
703       /* finished row so stick it into b->a */
704       pv   = b->a + bi[i];
705       pj   = b->j + bi[i];
706       nz   = bi[i + 1] - bi[i];
707       diag = diag_offset[i] - bi[i];
708       rs   = 0.0;
709       for (j = 0; j < nz; j++) {
710         pv[j] = rtmp[pj[j]];
711         rs += PetscAbsScalar(pv[j]);
712       }
713       rs -= PetscAbsScalar(pv[diag]);
714 
715       sctx.rs = rs;
716       sctx.pv = pv[diag];
717       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
718       if (sctx.newshift) break;
719       pv[diag] = sctx.pv;
720     }
721 
722     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
723       /*
724        * if no shift in this attempt & shifting & started shifting & can refine,
725        * then try lower shift
726        */
727       sctx.shift_hi       = sctx.shift_fraction;
728       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
729       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
730       sctx.newshift       = PETSC_TRUE;
731       sctx.nshift++;
732     }
733   } while (sctx.newshift);
734 
735   /* invert diagonal entries for simpler triangular solves */
736   for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
737   PetscCall(PetscFree(rtmp));
738   PetscCall(ISRestoreIndices(isicol, &ic));
739   PetscCall(ISRestoreIndices(isrow, &r));
740 
741   PetscCall(ISIdentity(isrow, &row_identity));
742   PetscCall(ISIdentity(isicol, &col_identity));
743   if (row_identity && col_identity) {
744     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
745   } else {
746     C->ops->solve = MatSolve_SeqAIJ_inplace;
747   }
748   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
749   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
750   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
751   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
752 
753   C->assembled    = PETSC_TRUE;
754   C->preallocated = PETSC_TRUE;
755 
756   PetscCall(PetscLogFlops(C->cmap->n));
757   if (sctx.nshift) {
758     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
759       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
760     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
761       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
762     }
763   }
764   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
765   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
766 
767   PetscCall(MatSeqAIJCheckInode(C));
768   PetscFunctionReturn(PETSC_SUCCESS);
769 }
770 
771 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
772 
773 /*
774    This routine implements inplace ILU(0) with row or/and column permutations.
775    Input:
776      A - original matrix
777    Output;
778      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
779          a->j (col index) is permuted by the inverse of colperm, then sorted
780          a->a reordered accordingly with a->j
781          a->diag (ptr to diagonal elements) is updated.
782 */
783 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
784 {
785   Mat_SeqAIJ      *a     = (Mat_SeqAIJ *)A->data;
786   IS               isrow = a->row, isicol = a->icol;
787   const PetscInt  *r, *ic, *ics;
788   PetscInt         i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
789   PetscInt        *ajtmp, nz, row;
790   PetscInt        *diag = a->diag, nbdiag, *pj;
791   PetscScalar     *rtmp, *pc, multiplier, d;
792   MatScalar       *pv, *v;
793   PetscReal        rs;
794   FactorShiftCtx   sctx;
795   const MatScalar *aa = a->a, *vtmp;
796 
797   PetscFunctionBegin;
798   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
799 
800   /* MatPivotSetUp(): initialize shift context sctx */
801   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
802 
803   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
804     const PetscInt *ddiag = a->diag;
805     sctx.shift_top        = info->zeropivot;
806     for (i = 0; i < n; i++) {
807       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
808       d    = (aa)[ddiag[i]];
809       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
810       vtmp = aa + ai[i];
811       nz   = ai[i + 1] - ai[i];
812       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
813       if (rs > sctx.shift_top) sctx.shift_top = rs;
814     }
815     sctx.shift_top *= 1.1;
816     sctx.nshift_max = 5;
817     sctx.shift_lo   = 0.;
818     sctx.shift_hi   = 1.;
819   }
820 
821   PetscCall(ISGetIndices(isrow, &r));
822   PetscCall(ISGetIndices(isicol, &ic));
823   PetscCall(PetscMalloc1(n + 1, &rtmp));
824   PetscCall(PetscArrayzero(rtmp, n + 1));
825   ics = ic;
826 
827 #if defined(MV)
828   sctx.shift_top      = 0.;
829   sctx.nshift_max     = 0;
830   sctx.shift_lo       = 0.;
831   sctx.shift_hi       = 0.;
832   sctx.shift_fraction = 0.;
833 
834   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
835     sctx.shift_top = 0.;
836     for (i = 0; i < n; i++) {
837       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
838       d  = (a->a)[diag[i]];
839       rs = -PetscAbsScalar(d) - PetscRealPart(d);
840       v  = a->a + ai[i];
841       nz = ai[i + 1] - ai[i];
842       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
843       if (rs > sctx.shift_top) sctx.shift_top = rs;
844     }
845     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
846     sctx.shift_top *= 1.1;
847     sctx.nshift_max = 5;
848     sctx.shift_lo   = 0.;
849     sctx.shift_hi   = 1.;
850   }
851 
852   sctx.shift_amount = 0.;
853   sctx.nshift       = 0;
854 #endif
855 
856   do {
857     sctx.newshift = PETSC_FALSE;
858     for (i = 0; i < n; i++) {
859       /* load in initial unfactored row */
860       nz    = ai[r[i] + 1] - ai[r[i]];
861       ajtmp = aj + ai[r[i]];
862       v     = a->a + ai[r[i]];
863       /* sort permuted ajtmp and values v accordingly */
864       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
865       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
866 
867       diag[r[i]] = ai[r[i]];
868       for (j = 0; j < nz; j++) {
869         rtmp[ajtmp[j]] = v[j];
870         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
871       }
872       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
873 
874       row = *ajtmp++;
875       while (row < i) {
876         pc = rtmp + row;
877         if (*pc != 0.0) {
878           pv = a->a + diag[r[row]];
879           pj = aj + diag[r[row]] + 1;
880 
881           multiplier = *pc / *pv++;
882           *pc        = multiplier;
883           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
884           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
885           PetscCall(PetscLogFlops(1 + 2.0 * nz));
886         }
887         row = *ajtmp++;
888       }
889       /* finished row so overwrite it onto a->a */
890       pv     = a->a + ai[r[i]];
891       pj     = aj + ai[r[i]];
892       nz     = ai[r[i] + 1] - ai[r[i]];
893       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
894 
895       rs = 0.0;
896       for (j = 0; j < nz; j++) {
897         pv[j] = rtmp[pj[j]];
898         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
899       }
900 
901       sctx.rs = rs;
902       sctx.pv = pv[nbdiag];
903       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
904       if (sctx.newshift) break;
905       pv[nbdiag] = sctx.pv;
906     }
907 
908     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
909       /*
910        * if no shift in this attempt & shifting & started shifting & can refine,
911        * then try lower shift
912        */
913       sctx.shift_hi       = sctx.shift_fraction;
914       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
915       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
916       sctx.newshift       = PETSC_TRUE;
917       sctx.nshift++;
918     }
919   } while (sctx.newshift);
920 
921   /* invert diagonal entries for simpler triangular solves */
922   for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];
923 
924   PetscCall(PetscFree(rtmp));
925   PetscCall(ISRestoreIndices(isicol, &ic));
926   PetscCall(ISRestoreIndices(isrow, &r));
927 
928   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
929   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
930   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
931   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
932 
933   A->assembled    = PETSC_TRUE;
934   A->preallocated = PETSC_TRUE;
935 
936   PetscCall(PetscLogFlops(A->cmap->n));
937   if (sctx.nshift) {
938     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
939       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
940     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
941       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
942     }
943   }
944   PetscFunctionReturn(PETSC_SUCCESS);
945 }
946 
947 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
948 {
949   Mat C;
950 
951   PetscFunctionBegin;
952   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
953   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
954   PetscCall(MatLUFactorNumeric(C, A, info));
955 
956   A->ops->solve          = C->ops->solve;
957   A->ops->solvetranspose = C->ops->solvetranspose;
958 
959   PetscCall(MatHeaderMerge(A, &C));
960   PetscFunctionReturn(PETSC_SUCCESS);
961 }
962 
963 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
964 {
965   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
966   IS                 iscol = a->col, isrow = a->row;
967   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
968   PetscInt           nz;
969   const PetscInt    *rout, *cout, *r, *c;
970   PetscScalar       *x, *tmp, *tmps, sum;
971   const PetscScalar *b;
972   const MatScalar   *aa = a->a, *v;
973 
974   PetscFunctionBegin;
975   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
976 
977   PetscCall(VecGetArrayRead(bb, &b));
978   PetscCall(VecGetArrayWrite(xx, &x));
979   tmp = a->solve_work;
980 
981   PetscCall(ISGetIndices(isrow, &rout));
982   r = rout;
983   PetscCall(ISGetIndices(iscol, &cout));
984   c = cout + (n - 1);
985 
986   /* forward solve the lower triangular */
987   tmp[0] = b[*r++];
988   tmps   = tmp;
989   for (i = 1; i < n; i++) {
990     v   = aa + ai[i];
991     vi  = aj + ai[i];
992     nz  = a->diag[i] - ai[i];
993     sum = b[*r++];
994     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
995     tmp[i] = sum;
996   }
997 
998   /* backward solve the upper triangular */
999   for (i = n - 1; i >= 0; i--) {
1000     v   = aa + a->diag[i] + 1;
1001     vi  = aj + a->diag[i] + 1;
1002     nz  = ai[i + 1] - a->diag[i] - 1;
1003     sum = tmp[i];
1004     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1005     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
1006   }
1007 
1008   PetscCall(ISRestoreIndices(isrow, &rout));
1009   PetscCall(ISRestoreIndices(iscol, &cout));
1010   PetscCall(VecRestoreArrayRead(bb, &b));
1011   PetscCall(VecRestoreArrayWrite(xx, &x));
1012   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1013   PetscFunctionReturn(PETSC_SUCCESS);
1014 }
1015 
1016 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1017 {
1018   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1019   IS                 iscol = a->col, isrow = a->row;
1020   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1021   PetscInt           nz, neq, ldb, ldx;
1022   const PetscInt    *rout, *cout, *r, *c;
1023   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
1024   const PetscScalar *b, *aa  = a->a, *v;
1025   PetscBool          isdense;
1026 
1027   PetscFunctionBegin;
1028   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1029   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1030   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1031   if (X != B) {
1032     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1033     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1034   }
1035   PetscCall(MatDenseGetArrayRead(B, &b));
1036   PetscCall(MatDenseGetLDA(B, &ldb));
1037   PetscCall(MatDenseGetArray(X, &x));
1038   PetscCall(MatDenseGetLDA(X, &ldx));
1039   PetscCall(ISGetIndices(isrow, &rout));
1040   r = rout;
1041   PetscCall(ISGetIndices(iscol, &cout));
1042   c = cout;
1043   for (neq = 0; neq < B->cmap->n; neq++) {
1044     /* forward solve the lower triangular */
1045     tmp[0] = b[r[0]];
1046     tmps   = tmp;
1047     for (i = 1; i < n; i++) {
1048       v   = aa + ai[i];
1049       vi  = aj + ai[i];
1050       nz  = a->diag[i] - ai[i];
1051       sum = b[r[i]];
1052       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1053       tmp[i] = sum;
1054     }
1055     /* backward solve the upper triangular */
1056     for (i = n - 1; i >= 0; i--) {
1057       v   = aa + a->diag[i] + 1;
1058       vi  = aj + a->diag[i] + 1;
1059       nz  = ai[i + 1] - a->diag[i] - 1;
1060       sum = tmp[i];
1061       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1062       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1063     }
1064     b += ldb;
1065     x += ldx;
1066   }
1067   PetscCall(ISRestoreIndices(isrow, &rout));
1068   PetscCall(ISRestoreIndices(iscol, &cout));
1069   PetscCall(MatDenseRestoreArrayRead(B, &b));
1070   PetscCall(MatDenseRestoreArray(X, &x));
1071   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1072   PetscFunctionReturn(PETSC_SUCCESS);
1073 }
1074 
1075 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1076 {
1077   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1078   IS                 iscol = a->col, isrow = a->row;
1079   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1080   PetscInt           nz, neq, ldb, ldx;
1081   const PetscInt    *rout, *cout, *r, *c;
1082   PetscScalar       *x, *tmp = a->solve_work, sum;
1083   const PetscScalar *b, *aa  = a->a, *v;
1084   PetscBool          isdense;
1085 
1086   PetscFunctionBegin;
1087   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1088   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1089   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1090   if (X != B) {
1091     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1092     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1093   }
1094   PetscCall(MatDenseGetArrayRead(B, &b));
1095   PetscCall(MatDenseGetLDA(B, &ldb));
1096   PetscCall(MatDenseGetArray(X, &x));
1097   PetscCall(MatDenseGetLDA(X, &ldx));
1098   PetscCall(ISGetIndices(isrow, &rout));
1099   r = rout;
1100   PetscCall(ISGetIndices(iscol, &cout));
1101   c = cout;
1102   for (neq = 0; neq < B->cmap->n; neq++) {
1103     /* forward solve the lower triangular */
1104     tmp[0] = b[r[0]];
1105     v      = aa;
1106     vi     = aj;
1107     for (i = 1; i < n; i++) {
1108       nz  = ai[i + 1] - ai[i];
1109       sum = b[r[i]];
1110       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1111       tmp[i] = sum;
1112       v += nz;
1113       vi += nz;
1114     }
1115     /* backward solve the upper triangular */
1116     for (i = n - 1; i >= 0; i--) {
1117       v   = aa + adiag[i + 1] + 1;
1118       vi  = aj + adiag[i + 1] + 1;
1119       nz  = adiag[i] - adiag[i + 1] - 1;
1120       sum = tmp[i];
1121       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1122       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1123     }
1124     b += ldb;
1125     x += ldx;
1126   }
1127   PetscCall(ISRestoreIndices(isrow, &rout));
1128   PetscCall(ISRestoreIndices(iscol, &cout));
1129   PetscCall(MatDenseRestoreArrayRead(B, &b));
1130   PetscCall(MatDenseRestoreArray(X, &x));
1131   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1132   PetscFunctionReturn(PETSC_SUCCESS);
1133 }
1134 
1135 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1136 {
1137   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1138   IS                 iscol = a->col, isrow = a->row;
1139   const PetscInt    *r, *c, *rout, *cout;
1140   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1141   PetscInt           nz, row;
1142   PetscScalar       *x, *tmp, *tmps, sum;
1143   const PetscScalar *b;
1144   const MatScalar   *aa = a->a, *v;
1145 
1146   PetscFunctionBegin;
1147   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1148 
1149   PetscCall(VecGetArrayRead(bb, &b));
1150   PetscCall(VecGetArrayWrite(xx, &x));
1151   tmp = a->solve_work;
1152 
1153   PetscCall(ISGetIndices(isrow, &rout));
1154   r = rout;
1155   PetscCall(ISGetIndices(iscol, &cout));
1156   c = cout + (n - 1);
1157 
1158   /* forward solve the lower triangular */
1159   tmp[0] = b[*r++];
1160   tmps   = tmp;
1161   for (row = 1; row < n; row++) {
1162     i   = rout[row]; /* permuted row */
1163     v   = aa + ai[i];
1164     vi  = aj + ai[i];
1165     nz  = a->diag[i] - ai[i];
1166     sum = b[*r++];
1167     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1168     tmp[row] = sum;
1169   }
1170 
1171   /* backward solve the upper triangular */
1172   for (row = n - 1; row >= 0; row--) {
1173     i   = rout[row]; /* permuted row */
1174     v   = aa + a->diag[i] + 1;
1175     vi  = aj + a->diag[i] + 1;
1176     nz  = ai[i + 1] - a->diag[i] - 1;
1177     sum = tmp[row];
1178     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1179     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1180   }
1181 
1182   PetscCall(ISRestoreIndices(isrow, &rout));
1183   PetscCall(ISRestoreIndices(iscol, &cout));
1184   PetscCall(VecRestoreArrayRead(bb, &b));
1185   PetscCall(VecRestoreArrayWrite(xx, &x));
1186   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1187   PetscFunctionReturn(PETSC_SUCCESS);
1188 }
1189 
1190 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1191 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1192 {
1193   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1194   PetscInt           n  = A->rmap->n;
1195   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1196   PetscScalar       *x;
1197   const PetscScalar *b;
1198   const MatScalar   *aa = a->a;
1199 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1200   PetscInt         adiag_i, i, nz, ai_i;
1201   const PetscInt  *vi;
1202   const MatScalar *v;
1203   PetscScalar      sum;
1204 #endif
1205 
1206   PetscFunctionBegin;
1207   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1208 
1209   PetscCall(VecGetArrayRead(bb, &b));
1210   PetscCall(VecGetArrayWrite(xx, &x));
1211 
1212 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1213   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1214 #else
1215   /* forward solve the lower triangular */
1216   x[0] = b[0];
1217   for (i = 1; i < n; i++) {
1218     ai_i = ai[i];
1219     v    = aa + ai_i;
1220     vi   = aj + ai_i;
1221     nz   = adiag[i] - ai_i;
1222     sum  = b[i];
1223     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1224     x[i] = sum;
1225   }
1226 
1227   /* backward solve the upper triangular */
1228   for (i = n - 1; i >= 0; i--) {
1229     adiag_i = adiag[i];
1230     v       = aa + adiag_i + 1;
1231     vi      = aj + adiag_i + 1;
1232     nz      = ai[i + 1] - adiag_i - 1;
1233     sum     = x[i];
1234     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1235     x[i] = sum * aa[adiag_i];
1236   }
1237 #endif
1238   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1239   PetscCall(VecRestoreArrayRead(bb, &b));
1240   PetscCall(VecRestoreArrayWrite(xx, &x));
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1245 {
1246   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1247   IS                 iscol = a->col, isrow = a->row;
1248   PetscInt           i, n                  = A->rmap->n, j;
1249   PetscInt           nz;
1250   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1251   PetscScalar       *x, *tmp, sum;
1252   const PetscScalar *b;
1253   const MatScalar   *aa = a->a, *v;
1254 
1255   PetscFunctionBegin;
1256   if (yy != xx) PetscCall(VecCopy(yy, xx));
1257 
1258   PetscCall(VecGetArrayRead(bb, &b));
1259   PetscCall(VecGetArray(xx, &x));
1260   tmp = a->solve_work;
1261 
1262   PetscCall(ISGetIndices(isrow, &rout));
1263   r = rout;
1264   PetscCall(ISGetIndices(iscol, &cout));
1265   c = cout + (n - 1);
1266 
1267   /* forward solve the lower triangular */
1268   tmp[0] = b[*r++];
1269   for (i = 1; i < n; i++) {
1270     v   = aa + ai[i];
1271     vi  = aj + ai[i];
1272     nz  = a->diag[i] - ai[i];
1273     sum = b[*r++];
1274     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1275     tmp[i] = sum;
1276   }
1277 
1278   /* backward solve the upper triangular */
1279   for (i = n - 1; i >= 0; i--) {
1280     v   = aa + a->diag[i] + 1;
1281     vi  = aj + a->diag[i] + 1;
1282     nz  = ai[i + 1] - a->diag[i] - 1;
1283     sum = tmp[i];
1284     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1285     tmp[i] = sum * aa[a->diag[i]];
1286     x[*c--] += tmp[i];
1287   }
1288 
1289   PetscCall(ISRestoreIndices(isrow, &rout));
1290   PetscCall(ISRestoreIndices(iscol, &cout));
1291   PetscCall(VecRestoreArrayRead(bb, &b));
1292   PetscCall(VecRestoreArray(xx, &x));
1293   PetscCall(PetscLogFlops(2.0 * a->nz));
1294   PetscFunctionReturn(PETSC_SUCCESS);
1295 }
1296 
1297 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1298 {
1299   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1300   IS                 iscol = a->col, isrow = a->row;
1301   PetscInt           i, n                  = A->rmap->n, j;
1302   PetscInt           nz;
1303   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1304   PetscScalar       *x, *tmp, sum;
1305   const PetscScalar *b;
1306   const MatScalar   *aa = a->a, *v;
1307 
1308   PetscFunctionBegin;
1309   if (yy != xx) PetscCall(VecCopy(yy, xx));
1310 
1311   PetscCall(VecGetArrayRead(bb, &b));
1312   PetscCall(VecGetArray(xx, &x));
1313   tmp = a->solve_work;
1314 
1315   PetscCall(ISGetIndices(isrow, &rout));
1316   r = rout;
1317   PetscCall(ISGetIndices(iscol, &cout));
1318   c = cout;
1319 
1320   /* forward solve the lower triangular */
1321   tmp[0] = b[r[0]];
1322   v      = aa;
1323   vi     = aj;
1324   for (i = 1; i < n; i++) {
1325     nz  = ai[i + 1] - ai[i];
1326     sum = b[r[i]];
1327     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1328     tmp[i] = sum;
1329     v += nz;
1330     vi += nz;
1331   }
1332 
1333   /* backward solve the upper triangular */
1334   v  = aa + adiag[n - 1];
1335   vi = aj + adiag[n - 1];
1336   for (i = n - 1; i >= 0; i--) {
1337     nz  = adiag[i] - adiag[i + 1] - 1;
1338     sum = tmp[i];
1339     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1340     tmp[i] = sum * v[nz];
1341     x[c[i]] += tmp[i];
1342     v += nz + 1;
1343     vi += nz + 1;
1344   }
1345 
1346   PetscCall(ISRestoreIndices(isrow, &rout));
1347   PetscCall(ISRestoreIndices(iscol, &cout));
1348   PetscCall(VecRestoreArrayRead(bb, &b));
1349   PetscCall(VecRestoreArray(xx, &x));
1350   PetscCall(PetscLogFlops(2.0 * a->nz));
1351   PetscFunctionReturn(PETSC_SUCCESS);
1352 }
1353 
1354 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1355 {
1356   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1357   IS                 iscol = a->col, isrow = a->row;
1358   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1359   PetscInt           i, n = A->rmap->n, j;
1360   PetscInt           nz;
1361   PetscScalar       *x, *tmp, s1;
1362   const MatScalar   *aa = a->a, *v;
1363   const PetscScalar *b;
1364 
1365   PetscFunctionBegin;
1366   PetscCall(VecGetArrayRead(bb, &b));
1367   PetscCall(VecGetArrayWrite(xx, &x));
1368   tmp = a->solve_work;
1369 
1370   PetscCall(ISGetIndices(isrow, &rout));
1371   r = rout;
1372   PetscCall(ISGetIndices(iscol, &cout));
1373   c = cout;
1374 
1375   /* copy the b into temp work space according to permutation */
1376   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1377 
1378   /* forward solve the U^T */
1379   for (i = 0; i < n; i++) {
1380     v  = aa + diag[i];
1381     vi = aj + diag[i] + 1;
1382     nz = ai[i + 1] - diag[i] - 1;
1383     s1 = tmp[i];
1384     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1385     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1386     tmp[i] = s1;
1387   }
1388 
1389   /* backward solve the L^T */
1390   for (i = n - 1; i >= 0; i--) {
1391     v  = aa + diag[i] - 1;
1392     vi = aj + diag[i] - 1;
1393     nz = diag[i] - ai[i];
1394     s1 = tmp[i];
1395     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1396   }
1397 
1398   /* copy tmp into x according to permutation */
1399   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1400 
1401   PetscCall(ISRestoreIndices(isrow, &rout));
1402   PetscCall(ISRestoreIndices(iscol, &cout));
1403   PetscCall(VecRestoreArrayRead(bb, &b));
1404   PetscCall(VecRestoreArrayWrite(xx, &x));
1405 
1406   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1407   PetscFunctionReturn(PETSC_SUCCESS);
1408 }
1409 
1410 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1411 {
1412   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1413   IS                 iscol = a->col, isrow = a->row;
1414   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1415   PetscInt           i, n = A->rmap->n, j;
1416   PetscInt           nz;
1417   PetscScalar       *x, *tmp, s1;
1418   const MatScalar   *aa = a->a, *v;
1419   const PetscScalar *b;
1420 
1421   PetscFunctionBegin;
1422   PetscCall(VecGetArrayRead(bb, &b));
1423   PetscCall(VecGetArrayWrite(xx, &x));
1424   tmp = a->solve_work;
1425 
1426   PetscCall(ISGetIndices(isrow, &rout));
1427   r = rout;
1428   PetscCall(ISGetIndices(iscol, &cout));
1429   c = cout;
1430 
1431   /* copy the b into temp work space according to permutation */
1432   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1433 
1434   /* forward solve the U^T */
1435   for (i = 0; i < n; i++) {
1436     v  = aa + adiag[i + 1] + 1;
1437     vi = aj + adiag[i + 1] + 1;
1438     nz = adiag[i] - adiag[i + 1] - 1;
1439     s1 = tmp[i];
1440     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1441     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1442     tmp[i] = s1;
1443   }
1444 
1445   /* backward solve the L^T */
1446   for (i = n - 1; i >= 0; i--) {
1447     v  = aa + ai[i];
1448     vi = aj + ai[i];
1449     nz = ai[i + 1] - ai[i];
1450     s1 = tmp[i];
1451     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1452   }
1453 
1454   /* copy tmp into x according to permutation */
1455   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1456 
1457   PetscCall(ISRestoreIndices(isrow, &rout));
1458   PetscCall(ISRestoreIndices(iscol, &cout));
1459   PetscCall(VecRestoreArrayRead(bb, &b));
1460   PetscCall(VecRestoreArrayWrite(xx, &x));
1461 
1462   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1463   PetscFunctionReturn(PETSC_SUCCESS);
1464 }
1465 
1466 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1467 {
1468   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1469   IS                 iscol = a->col, isrow = a->row;
1470   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1471   PetscInt           i, n = A->rmap->n, j;
1472   PetscInt           nz;
1473   PetscScalar       *x, *tmp, s1;
1474   const MatScalar   *aa = a->a, *v;
1475   const PetscScalar *b;
1476 
1477   PetscFunctionBegin;
1478   if (zz != xx) PetscCall(VecCopy(zz, xx));
1479   PetscCall(VecGetArrayRead(bb, &b));
1480   PetscCall(VecGetArray(xx, &x));
1481   tmp = a->solve_work;
1482 
1483   PetscCall(ISGetIndices(isrow, &rout));
1484   r = rout;
1485   PetscCall(ISGetIndices(iscol, &cout));
1486   c = cout;
1487 
1488   /* copy the b into temp work space according to permutation */
1489   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1490 
1491   /* forward solve the U^T */
1492   for (i = 0; i < n; i++) {
1493     v  = aa + diag[i];
1494     vi = aj + diag[i] + 1;
1495     nz = ai[i + 1] - diag[i] - 1;
1496     s1 = tmp[i];
1497     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1498     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1499     tmp[i] = s1;
1500   }
1501 
1502   /* backward solve the L^T */
1503   for (i = n - 1; i >= 0; i--) {
1504     v  = aa + diag[i] - 1;
1505     vi = aj + diag[i] - 1;
1506     nz = diag[i] - ai[i];
1507     s1 = tmp[i];
1508     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1509   }
1510 
1511   /* copy tmp into x according to permutation */
1512   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1513 
1514   PetscCall(ISRestoreIndices(isrow, &rout));
1515   PetscCall(ISRestoreIndices(iscol, &cout));
1516   PetscCall(VecRestoreArrayRead(bb, &b));
1517   PetscCall(VecRestoreArray(xx, &x));
1518 
1519   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1520   PetscFunctionReturn(PETSC_SUCCESS);
1521 }
1522 
1523 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1524 {
1525   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1526   IS                 iscol = a->col, isrow = a->row;
1527   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1528   PetscInt           i, n = A->rmap->n, j;
1529   PetscInt           nz;
1530   PetscScalar       *x, *tmp, s1;
1531   const MatScalar   *aa = a->a, *v;
1532   const PetscScalar *b;
1533 
1534   PetscFunctionBegin;
1535   if (zz != xx) PetscCall(VecCopy(zz, xx));
1536   PetscCall(VecGetArrayRead(bb, &b));
1537   PetscCall(VecGetArray(xx, &x));
1538   tmp = a->solve_work;
1539 
1540   PetscCall(ISGetIndices(isrow, &rout));
1541   r = rout;
1542   PetscCall(ISGetIndices(iscol, &cout));
1543   c = cout;
1544 
1545   /* copy the b into temp work space according to permutation */
1546   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1547 
1548   /* forward solve the U^T */
1549   for (i = 0; i < n; i++) {
1550     v  = aa + adiag[i + 1] + 1;
1551     vi = aj + adiag[i + 1] + 1;
1552     nz = adiag[i] - adiag[i + 1] - 1;
1553     s1 = tmp[i];
1554     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1555     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1556     tmp[i] = s1;
1557   }
1558 
1559   /* backward solve the L^T */
1560   for (i = n - 1; i >= 0; i--) {
1561     v  = aa + ai[i];
1562     vi = aj + ai[i];
1563     nz = ai[i + 1] - ai[i];
1564     s1 = tmp[i];
1565     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1566   }
1567 
1568   /* copy tmp into x according to permutation */
1569   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1570 
1571   PetscCall(ISRestoreIndices(isrow, &rout));
1572   PetscCall(ISRestoreIndices(iscol, &cout));
1573   PetscCall(VecRestoreArrayRead(bb, &b));
1574   PetscCall(VecRestoreArray(xx, &x));
1575 
1576   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1577   PetscFunctionReturn(PETSC_SUCCESS);
1578 }
1579 
1580 /*
1581    ilu() under revised new data structure.
1582    Factored arrays bj and ba are stored as
1583      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1584 
1585    bi=fact->i is an array of size n+1, in which
1586      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1587      bi[n]:  points to L(n-1,n-1)+1
1588 
1589   bdiag=fact->diag is an array of size n+1,in which
1590      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1591      bdiag[n]: points to entry of U(n-1,0)-1
1592 
1593    U(i,:) contains bdiag[i] as its last entry, i.e.,
1594     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1595 */
1596 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1597 {
1598   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1599   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1600   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1601   IS             isicol;
1602 
1603   PetscFunctionBegin;
1604   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1605   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1606   b = (Mat_SeqAIJ *)(fact)->data;
1607 
1608   /* allocate matrix arrays for new data structure */
1609   PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i));
1610 
1611   b->singlemalloc = PETSC_TRUE;
1612   if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1613   bdiag = b->diag;
1614 
1615   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1616 
1617   /* set bi and bj with new data structure */
1618   bi = b->i;
1619   bj = b->j;
1620 
1621   /* L part */
1622   bi[0] = 0;
1623   for (i = 0; i < n; i++) {
1624     nz        = adiag[i] - ai[i];
1625     bi[i + 1] = bi[i] + nz;
1626     aj        = a->j + ai[i];
1627     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1628   }
1629 
1630   /* U part */
1631   bdiag[n] = bi[n] - 1;
1632   for (i = n - 1; i >= 0; i--) {
1633     nz = ai[i + 1] - adiag[i] - 1;
1634     aj = a->j + adiag[i] + 1;
1635     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1636     /* diag[i] */
1637     bj[k++]  = i;
1638     bdiag[i] = bdiag[i + 1] + nz + 1;
1639   }
1640 
1641   fact->factortype             = MAT_FACTOR_ILU;
1642   fact->info.factor_mallocs    = 0;
1643   fact->info.fill_ratio_given  = info->fill;
1644   fact->info.fill_ratio_needed = 1.0;
1645   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1646   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1647 
1648   b       = (Mat_SeqAIJ *)(fact)->data;
1649   b->row  = isrow;
1650   b->col  = iscol;
1651   b->icol = isicol;
1652   PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
1653   PetscCall(PetscObjectReference((PetscObject)isrow));
1654   PetscCall(PetscObjectReference((PetscObject)iscol));
1655   PetscFunctionReturn(PETSC_SUCCESS);
1656 }
1657 
1658 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1659 {
1660   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1661   IS                 isicol;
1662   const PetscInt    *r, *ic;
1663   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1664   PetscInt          *bi, *cols, nnz, *cols_lvl;
1665   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1666   PetscInt           i, levels, diagonal_fill;
1667   PetscBool          col_identity, row_identity, missing;
1668   PetscReal          f;
1669   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1670   PetscBT            lnkbt;
1671   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1672   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1673   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1674 
1675   PetscFunctionBegin;
1676   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1677   PetscCall(MatMissingDiagonal(A, &missing, &i));
1678   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1679 
1680   levels = (PetscInt)info->levels;
1681   PetscCall(ISIdentity(isrow, &row_identity));
1682   PetscCall(ISIdentity(iscol, &col_identity));
1683   if (!levels && row_identity && col_identity) {
1684     /* special case: ilu(0) with natural ordering */
1685     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1686     if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1687     PetscFunctionReturn(PETSC_SUCCESS);
1688   }
1689 
1690   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1691   PetscCall(ISGetIndices(isrow, &r));
1692   PetscCall(ISGetIndices(isicol, &ic));
1693 
1694   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1695   PetscCall(PetscMalloc1(n + 1, &bi));
1696   PetscCall(PetscMalloc1(n + 1, &bdiag));
1697   bi[0] = bdiag[0] = 0;
1698   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1699 
1700   /* create a linked list for storing column indices of the active row */
1701   nlnk = n + 1;
1702   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1703 
1704   /* initial FreeSpace size is f*(ai[n]+1) */
1705   f             = info->fill;
1706   diagonal_fill = (PetscInt)info->diagonal_fill;
1707   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1708   current_space = free_space;
1709   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1710   current_space_lvl = free_space_lvl;
1711   for (i = 0; i < n; i++) {
1712     nzi = 0;
1713     /* copy current row into linked list */
1714     nnz = ai[r[i] + 1] - ai[r[i]];
1715     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1716     cols   = aj + ai[r[i]];
1717     lnk[i] = -1; /* marker to indicate if diagonal exists */
1718     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1719     nzi += nlnk;
1720 
1721     /* make sure diagonal entry is included */
1722     if (diagonal_fill && lnk[i] == -1) {
1723       fm = n;
1724       while (lnk[fm] < i) fm = lnk[fm];
1725       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1726       lnk[fm]    = i;
1727       lnk_lvl[i] = 0;
1728       nzi++;
1729       dcount++;
1730     }
1731 
1732     /* add pivot rows into the active row */
1733     nzbd = 0;
1734     prow = lnk[n];
1735     while (prow < i) {
1736       nnz      = bdiag[prow];
1737       cols     = bj_ptr[prow] + nnz + 1;
1738       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1739       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1740       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1741       nzi += nlnk;
1742       prow = lnk[prow];
1743       nzbd++;
1744     }
1745     bdiag[i]  = nzbd;
1746     bi[i + 1] = bi[i] + nzi;
1747     /* if free space is not available, make more free space */
1748     if (current_space->local_remaining < nzi) {
1749       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1750       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1751       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1752       reallocs++;
1753     }
1754 
1755     /* copy data into free_space and free_space_lvl, then initialize lnk */
1756     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1757     bj_ptr[i]    = current_space->array;
1758     bjlvl_ptr[i] = current_space_lvl->array;
1759 
1760     /* make sure the active row i has diagonal entry */
1761     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1762 
1763     current_space->array += nzi;
1764     current_space->local_used += nzi;
1765     current_space->local_remaining -= nzi;
1766     current_space_lvl->array += nzi;
1767     current_space_lvl->local_used += nzi;
1768     current_space_lvl->local_remaining -= nzi;
1769   }
1770 
1771   PetscCall(ISRestoreIndices(isrow, &r));
1772   PetscCall(ISRestoreIndices(isicol, &ic));
1773   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1774   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1775   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1776 
1777   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1778   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1779   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1780 
1781 #if defined(PETSC_USE_INFO)
1782   {
1783     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1784     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1785     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1786     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1787     PetscCall(PetscInfo(A, "for best performance.\n"));
1788     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1789   }
1790 #endif
1791   /* put together the new matrix */
1792   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1793   b = (Mat_SeqAIJ *)(fact)->data;
1794 
1795   b->free_a       = PETSC_TRUE;
1796   b->free_ij      = PETSC_TRUE;
1797   b->singlemalloc = PETSC_FALSE;
1798 
1799   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
1800 
1801   b->j    = bj;
1802   b->i    = bi;
1803   b->diag = bdiag;
1804   b->ilen = NULL;
1805   b->imax = NULL;
1806   b->row  = isrow;
1807   b->col  = iscol;
1808   PetscCall(PetscObjectReference((PetscObject)isrow));
1809   PetscCall(PetscObjectReference((PetscObject)iscol));
1810   b->icol = isicol;
1811 
1812   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1813   /* In b structure:  Free imax, ilen, old a, old j.
1814      Allocate bdiag, solve_work, new a, new j */
1815   b->maxnz = b->nz = bdiag[0] + 1;
1816 
1817   (fact)->info.factor_mallocs    = reallocs;
1818   (fact)->info.fill_ratio_given  = f;
1819   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1820   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1821   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1822   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1823   PetscFunctionReturn(PETSC_SUCCESS);
1824 }
1825 
1826 #if 0
1827 // unused
1828 static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1829 {
1830   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1831   IS                 isicol;
1832   const PetscInt    *r, *ic;
1833   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1834   PetscInt          *bi, *cols, nnz, *cols_lvl;
1835   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1836   PetscInt           i, levels, diagonal_fill;
1837   PetscBool          col_identity, row_identity;
1838   PetscReal          f;
1839   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1840   PetscBT            lnkbt;
1841   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1842   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1843   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1844   PetscBool          missing;
1845 
1846   PetscFunctionBegin;
1847   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1848   PetscCall(MatMissingDiagonal(A, &missing, &i));
1849   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1850 
1851   f             = info->fill;
1852   levels        = (PetscInt)info->levels;
1853   diagonal_fill = (PetscInt)info->diagonal_fill;
1854 
1855   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1856 
1857   PetscCall(ISIdentity(isrow, &row_identity));
1858   PetscCall(ISIdentity(iscol, &col_identity));
1859   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1860     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));
1861 
1862     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1863     if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1864     fact->factortype               = MAT_FACTOR_ILU;
1865     (fact)->info.factor_mallocs    = 0;
1866     (fact)->info.fill_ratio_given  = info->fill;
1867     (fact)->info.fill_ratio_needed = 1.0;
1868 
1869     b       = (Mat_SeqAIJ *)(fact)->data;
1870     b->row  = isrow;
1871     b->col  = iscol;
1872     b->icol = isicol;
1873     PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1874     PetscCall(PetscObjectReference((PetscObject)isrow));
1875     PetscCall(PetscObjectReference((PetscObject)iscol));
1876     PetscFunctionReturn(PETSC_SUCCESS);
1877   }
1878 
1879   PetscCall(ISGetIndices(isrow, &r));
1880   PetscCall(ISGetIndices(isicol, &ic));
1881 
1882   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1883   PetscCall(PetscMalloc1(n + 1, &bi));
1884   PetscCall(PetscMalloc1(n + 1, &bdiag));
1885   bi[0] = bdiag[0] = 0;
1886 
1887   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1888 
1889   /* create a linked list for storing column indices of the active row */
1890   nlnk = n + 1;
1891   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1892 
1893   /* initial FreeSpace size is f*(ai[n]+1) */
1894   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1895   current_space = free_space;
1896   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1897   current_space_lvl = free_space_lvl;
1898 
1899   for (i = 0; i < n; i++) {
1900     nzi = 0;
1901     /* copy current row into linked list */
1902     nnz = ai[r[i] + 1] - ai[r[i]];
1903     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1904     cols   = aj + ai[r[i]];
1905     lnk[i] = -1; /* marker to indicate if diagonal exists */
1906     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1907     nzi += nlnk;
1908 
1909     /* make sure diagonal entry is included */
1910     if (diagonal_fill && lnk[i] == -1) {
1911       fm = n;
1912       while (lnk[fm] < i) fm = lnk[fm];
1913       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1914       lnk[fm]    = i;
1915       lnk_lvl[i] = 0;
1916       nzi++;
1917       dcount++;
1918     }
1919 
1920     /* add pivot rows into the active row */
1921     nzbd = 0;
1922     prow = lnk[n];
1923     while (prow < i) {
1924       nnz      = bdiag[prow];
1925       cols     = bj_ptr[prow] + nnz + 1;
1926       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1927       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1928       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1929       nzi += nlnk;
1930       prow = lnk[prow];
1931       nzbd++;
1932     }
1933     bdiag[i]  = nzbd;
1934     bi[i + 1] = bi[i] + nzi;
1935 
1936     /* if free space is not available, make more free space */
1937     if (current_space->local_remaining < nzi) {
1938       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
1939       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1940       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1941       reallocs++;
1942     }
1943 
1944     /* copy data into free_space and free_space_lvl, then initialize lnk */
1945     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1946     bj_ptr[i]    = current_space->array;
1947     bjlvl_ptr[i] = current_space_lvl->array;
1948 
1949     /* make sure the active row i has diagonal entry */
1950     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1951 
1952     current_space->array += nzi;
1953     current_space->local_used += nzi;
1954     current_space->local_remaining -= nzi;
1955     current_space_lvl->array += nzi;
1956     current_space_lvl->local_used += nzi;
1957     current_space_lvl->local_remaining -= nzi;
1958   }
1959 
1960   PetscCall(ISRestoreIndices(isrow, &r));
1961   PetscCall(ISRestoreIndices(isicol, &ic));
1962 
1963   /* destroy list of free space and other temporary arrays */
1964   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1965   PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
1966   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1967   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1968   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1969 
1970   #if defined(PETSC_USE_INFO)
1971   {
1972     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
1973     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1974     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1975     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1976     PetscCall(PetscInfo(A, "for best performance.\n"));
1977     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1978   }
1979   #endif
1980 
1981   /* put together the new matrix */
1982   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1983   b = (Mat_SeqAIJ *)(fact)->data;
1984 
1985   b->free_a       = PETSC_TRUE;
1986   b->free_ij      = PETSC_TRUE;
1987   b->singlemalloc = PETSC_FALSE;
1988 
1989   PetscCall(PetscMalloc1(bi[n], &b->a));
1990   b->j = bj;
1991   b->i = bi;
1992   for (i = 0; i < n; i++) bdiag[i] += bi[i];
1993   b->diag = bdiag;
1994   b->ilen = NULL;
1995   b->imax = NULL;
1996   b->row  = isrow;
1997   b->col  = iscol;
1998   PetscCall(PetscObjectReference((PetscObject)isrow));
1999   PetscCall(PetscObjectReference((PetscObject)iscol));
2000   b->icol = isicol;
2001   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2002   /* In b structure:  Free imax, ilen, old a, old j.
2003      Allocate bdiag, solve_work, new a, new j */
2004   b->maxnz = b->nz = bi[n];
2005 
2006   (fact)->info.factor_mallocs    = reallocs;
2007   (fact)->info.fill_ratio_given  = f;
2008   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2009   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
2010   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2011   PetscFunctionReturn(PETSC_SUCCESS);
2012 }
2013 #endif
2014 
2015 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2016 {
2017   Mat             C  = B;
2018   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2019   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2020   IS              ip = b->row, iip = b->icol;
2021   const PetscInt *rip, *riip;
2022   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
2023   PetscInt       *ai = a->i, *aj = a->j;
2024   PetscInt        k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2025   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2026   PetscBool       perm_identity;
2027   FactorShiftCtx  sctx;
2028   PetscReal       rs;
2029   MatScalar       d, *v;
2030 
2031   PetscFunctionBegin;
2032   /* MatPivotSetUp(): initialize shift context sctx */
2033   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2034 
2035   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2036     sctx.shift_top = info->zeropivot;
2037     for (i = 0; i < mbs; i++) {
2038       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2039       d  = (aa)[a->diag[i]];
2040       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2041       v  = aa + ai[i];
2042       nz = ai[i + 1] - ai[i];
2043       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2044       if (rs > sctx.shift_top) sctx.shift_top = rs;
2045     }
2046     sctx.shift_top *= 1.1;
2047     sctx.nshift_max = 5;
2048     sctx.shift_lo   = 0.;
2049     sctx.shift_hi   = 1.;
2050   }
2051 
2052   PetscCall(ISGetIndices(ip, &rip));
2053   PetscCall(ISGetIndices(iip, &riip));
2054 
2055   /* allocate working arrays
2056      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2057      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2058   */
2059   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
2060 
2061   do {
2062     sctx.newshift = PETSC_FALSE;
2063 
2064     for (i = 0; i < mbs; i++) c2r[i] = mbs;
2065     if (mbs) il[0] = 0;
2066 
2067     for (k = 0; k < mbs; k++) {
2068       /* zero rtmp */
2069       nz    = bi[k + 1] - bi[k];
2070       bjtmp = bj + bi[k];
2071       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2072 
2073       /* load in initial unfactored row */
2074       bval = ba + bi[k];
2075       jmin = ai[rip[k]];
2076       jmax = ai[rip[k] + 1];
2077       for (j = jmin; j < jmax; j++) {
2078         col = riip[aj[j]];
2079         if (col >= k) { /* only take upper triangular entry */
2080           rtmp[col] = aa[j];
2081           *bval++   = 0.0; /* for in-place factorization */
2082         }
2083       }
2084       /* shift the diagonal of the matrix: ZeropivotApply() */
2085       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2086 
2087       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2088       dk = rtmp[k];
2089       i  = c2r[k]; /* first row to be added to k_th row  */
2090 
2091       while (i < k) {
2092         nexti = c2r[i]; /* next row to be added to k_th row */
2093 
2094         /* compute multiplier, update diag(k) and U(i,k) */
2095         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
2096         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2097         dk += uikdi * ba[ili];           /* update diag[k] */
2098         ba[ili] = uikdi;                 /* -U(i,k) */
2099 
2100         /* add multiple of row i to k-th row */
2101         jmin = ili + 1;
2102         jmax = bi[i + 1];
2103         if (jmin < jmax) {
2104           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2105           /* update il and c2r for row i */
2106           il[i]  = jmin;
2107           j      = bj[jmin];
2108           c2r[i] = c2r[j];
2109           c2r[j] = i;
2110         }
2111         i = nexti;
2112       }
2113 
2114       /* copy data into U(k,:) */
2115       rs   = 0.0;
2116       jmin = bi[k];
2117       jmax = bi[k + 1] - 1;
2118       if (jmin < jmax) {
2119         for (j = jmin; j < jmax; j++) {
2120           col   = bj[j];
2121           ba[j] = rtmp[col];
2122           rs += PetscAbsScalar(ba[j]);
2123         }
2124         /* add the k-th row into il and c2r */
2125         il[k]  = jmin;
2126         i      = bj[jmin];
2127         c2r[k] = c2r[i];
2128         c2r[i] = k;
2129       }
2130 
2131       /* MatPivotCheck() */
2132       sctx.rs = rs;
2133       sctx.pv = dk;
2134       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2135       if (sctx.newshift) break;
2136       dk = sctx.pv;
2137 
2138       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2139     }
2140   } while (sctx.newshift);
2141 
2142   PetscCall(PetscFree3(rtmp, il, c2r));
2143   PetscCall(ISRestoreIndices(ip, &rip));
2144   PetscCall(ISRestoreIndices(iip, &riip));
2145 
2146   PetscCall(ISIdentity(ip, &perm_identity));
2147   if (perm_identity) {
2148     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2149     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2150     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2151     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2152   } else {
2153     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2154     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2155     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2156     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2157   }
2158 
2159   C->assembled    = PETSC_TRUE;
2160   C->preallocated = PETSC_TRUE;
2161 
2162   PetscCall(PetscLogFlops(C->rmap->n));
2163 
2164   /* MatPivotView() */
2165   if (sctx.nshift) {
2166     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2167       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2168     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2169       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2170     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2171       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2172     }
2173   }
2174   PetscFunctionReturn(PETSC_SUCCESS);
2175 }
2176 
2177 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2178 {
2179   Mat             C  = B;
2180   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2181   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2182   IS              ip = b->row, iip = b->icol;
2183   const PetscInt *rip, *riip;
2184   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2185   PetscInt       *ai = a->i, *aj = a->j;
2186   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2187   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2188   PetscBool       perm_identity;
2189   FactorShiftCtx  sctx;
2190   PetscReal       rs;
2191   MatScalar       d, *v;
2192 
2193   PetscFunctionBegin;
2194   /* MatPivotSetUp(): initialize shift context sctx */
2195   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2196 
2197   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2198     sctx.shift_top = info->zeropivot;
2199     for (i = 0; i < mbs; i++) {
2200       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2201       d  = (aa)[a->diag[i]];
2202       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2203       v  = aa + ai[i];
2204       nz = ai[i + 1] - ai[i];
2205       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2206       if (rs > sctx.shift_top) sctx.shift_top = rs;
2207     }
2208     sctx.shift_top *= 1.1;
2209     sctx.nshift_max = 5;
2210     sctx.shift_lo   = 0.;
2211     sctx.shift_hi   = 1.;
2212   }
2213 
2214   PetscCall(ISGetIndices(ip, &rip));
2215   PetscCall(ISGetIndices(iip, &riip));
2216 
2217   /* initialization */
2218   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
2219 
2220   do {
2221     sctx.newshift = PETSC_FALSE;
2222 
2223     for (i = 0; i < mbs; i++) jl[i] = mbs;
2224     il[0] = 0;
2225 
2226     for (k = 0; k < mbs; k++) {
2227       /* zero rtmp */
2228       nz    = bi[k + 1] - bi[k];
2229       bjtmp = bj + bi[k];
2230       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2231 
2232       bval = ba + bi[k];
2233       /* initialize k-th row by the perm[k]-th row of A */
2234       jmin = ai[rip[k]];
2235       jmax = ai[rip[k] + 1];
2236       for (j = jmin; j < jmax; j++) {
2237         col = riip[aj[j]];
2238         if (col >= k) { /* only take upper triangular entry */
2239           rtmp[col] = aa[j];
2240           *bval++   = 0.0; /* for in-place factorization */
2241         }
2242       }
2243       /* shift the diagonal of the matrix */
2244       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2245 
2246       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2247       dk = rtmp[k];
2248       i  = jl[k]; /* first row to be added to k_th row  */
2249 
2250       while (i < k) {
2251         nexti = jl[i]; /* next row to be added to k_th row */
2252 
2253         /* compute multiplier, update diag(k) and U(i,k) */
2254         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
2255         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2256         dk += uikdi * ba[ili];
2257         ba[ili] = uikdi; /* -U(i,k) */
2258 
2259         /* add multiple of row i to k-th row */
2260         jmin = ili + 1;
2261         jmax = bi[i + 1];
2262         if (jmin < jmax) {
2263           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2264           /* update il and jl for row i */
2265           il[i] = jmin;
2266           j     = bj[jmin];
2267           jl[i] = jl[j];
2268           jl[j] = i;
2269         }
2270         i = nexti;
2271       }
2272 
2273       /* shift the diagonals when zero pivot is detected */
2274       /* compute rs=sum of abs(off-diagonal) */
2275       rs   = 0.0;
2276       jmin = bi[k] + 1;
2277       nz   = bi[k + 1] - jmin;
2278       bcol = bj + jmin;
2279       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
2280 
2281       sctx.rs = rs;
2282       sctx.pv = dk;
2283       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2284       if (sctx.newshift) break;
2285       dk = sctx.pv;
2286 
2287       /* copy data into U(k,:) */
2288       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2289       jmin      = bi[k] + 1;
2290       jmax      = bi[k + 1];
2291       if (jmin < jmax) {
2292         for (j = jmin; j < jmax; j++) {
2293           col   = bj[j];
2294           ba[j] = rtmp[col];
2295         }
2296         /* add the k-th row into il and jl */
2297         il[k] = jmin;
2298         i     = bj[jmin];
2299         jl[k] = jl[i];
2300         jl[i] = k;
2301       }
2302     }
2303   } while (sctx.newshift);
2304 
2305   PetscCall(PetscFree3(rtmp, il, jl));
2306   PetscCall(ISRestoreIndices(ip, &rip));
2307   PetscCall(ISRestoreIndices(iip, &riip));
2308 
2309   PetscCall(ISIdentity(ip, &perm_identity));
2310   if (perm_identity) {
2311     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2312     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2313     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2314     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2315   } else {
2316     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2317     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2318     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2319     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2320   }
2321 
2322   C->assembled    = PETSC_TRUE;
2323   C->preallocated = PETSC_TRUE;
2324 
2325   PetscCall(PetscLogFlops(C->rmap->n));
2326   if (sctx.nshift) {
2327     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2328       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2329     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2330       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2331     }
2332   }
2333   PetscFunctionReturn(PETSC_SUCCESS);
2334 }
2335 
2336 /*
2337    icc() under revised new data structure.
2338    Factored arrays bj and ba are stored as
2339      U(0,:),...,U(i,:),U(n-1,:)
2340 
2341    ui=fact->i is an array of size n+1, in which
2342    ui+
2343      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2344      ui[n]:  points to U(n-1,n-1)+1
2345 
2346   udiag=fact->diag is an array of size n,in which
2347      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2348 
2349    U(i,:) contains udiag[i] as its last entry, i.e.,
2350     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2351 */
2352 
2353 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2354 {
2355   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2356   Mat_SeqSBAIJ      *b;
2357   PetscBool          perm_identity, missing;
2358   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2359   const PetscInt    *rip, *riip;
2360   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2361   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2362   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2363   PetscReal          fill = info->fill, levels = info->levels;
2364   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2365   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2366   PetscBT            lnkbt;
2367   IS                 iperm;
2368 
2369   PetscFunctionBegin;
2370   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2371   PetscCall(MatMissingDiagonal(A, &missing, &d));
2372   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2373   PetscCall(ISIdentity(perm, &perm_identity));
2374   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2375 
2376   PetscCall(PetscMalloc1(am + 1, &ui));
2377   PetscCall(PetscMalloc1(am + 1, &udiag));
2378   ui[0] = 0;
2379 
2380   /* ICC(0) without matrix ordering: simply rearrange column indices */
2381   if (!levels && perm_identity) {
2382     for (i = 0; i < am; i++) {
2383       ncols     = ai[i + 1] - a->diag[i];
2384       ui[i + 1] = ui[i] + ncols;
2385       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2386     }
2387     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2388     cols = uj;
2389     for (i = 0; i < am; i++) {
2390       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2391       ncols = ai[i + 1] - a->diag[i] - 1;
2392       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2393       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2394     }
2395   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2396     PetscCall(ISGetIndices(iperm, &riip));
2397     PetscCall(ISGetIndices(perm, &rip));
2398 
2399     /* initialization */
2400     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2401 
2402     /* jl: linked list for storing indices of the pivot rows
2403        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2404     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2405     for (i = 0; i < am; i++) {
2406       jl[i] = am;
2407       il[i] = 0;
2408     }
2409 
2410     /* create and initialize a linked list for storing column indices of the active row k */
2411     nlnk = am + 1;
2412     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2413 
2414     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2415     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2416     current_space = free_space;
2417     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2418     current_space_lvl = free_space_lvl;
2419 
2420     for (k = 0; k < am; k++) { /* for each active row k */
2421       /* initialize lnk by the column indices of row rip[k] of A */
2422       nzk   = 0;
2423       ncols = ai[rip[k] + 1] - ai[rip[k]];
2424       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2425       ncols_upper = 0;
2426       for (j = 0; j < ncols; j++) {
2427         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2428         if (riip[i] >= k) {         /* only take upper triangular entry */
2429           ajtmp[ncols_upper] = i;
2430           ncols_upper++;
2431         }
2432       }
2433       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2434       nzk += nlnk;
2435 
2436       /* update lnk by computing fill-in for each pivot row to be merged in */
2437       prow = jl[k]; /* 1st pivot row */
2438 
2439       while (prow < k) {
2440         nextprow = jl[prow];
2441 
2442         /* merge prow into k-th row */
2443         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2444         jmax  = ui[prow + 1];
2445         ncols = jmax - jmin;
2446         i     = jmin - ui[prow];
2447         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2448         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2449         j     = *(uj - 1);
2450         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2451         nzk += nlnk;
2452 
2453         /* update il and jl for prow */
2454         if (jmin < jmax) {
2455           il[prow] = jmin;
2456           j        = *cols;
2457           jl[prow] = jl[j];
2458           jl[j]    = prow;
2459         }
2460         prow = nextprow;
2461       }
2462 
2463       /* if free space is not available, make more free space */
2464       if (current_space->local_remaining < nzk) {
2465         i = am - k + 1;                                    /* num of unfactored rows */
2466         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2467         PetscCall(PetscFreeSpaceGet(i, &current_space));
2468         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2469         reallocs++;
2470       }
2471 
2472       /* copy data into free_space and free_space_lvl, then initialize lnk */
2473       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2474       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2475 
2476       /* add the k-th row into il and jl */
2477       if (nzk > 1) {
2478         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2479         jl[k] = jl[i];
2480         jl[i] = k;
2481         il[k] = ui[k] + 1;
2482       }
2483       uj_ptr[k]     = current_space->array;
2484       uj_lvl_ptr[k] = current_space_lvl->array;
2485 
2486       current_space->array += nzk;
2487       current_space->local_used += nzk;
2488       current_space->local_remaining -= nzk;
2489 
2490       current_space_lvl->array += nzk;
2491       current_space_lvl->local_used += nzk;
2492       current_space_lvl->local_remaining -= nzk;
2493 
2494       ui[k + 1] = ui[k] + nzk;
2495     }
2496 
2497     PetscCall(ISRestoreIndices(perm, &rip));
2498     PetscCall(ISRestoreIndices(iperm, &riip));
2499     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2500     PetscCall(PetscFree(ajtmp));
2501 
2502     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2503     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2504     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
2505     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2506     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2507 
2508   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2509 
2510   /* put together the new matrix in MATSEQSBAIJ format */
2511   b               = (Mat_SeqSBAIJ *)(fact)->data;
2512   b->singlemalloc = PETSC_FALSE;
2513 
2514   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2515 
2516   b->j         = uj;
2517   b->i         = ui;
2518   b->diag      = udiag;
2519   b->free_diag = PETSC_TRUE;
2520   b->ilen      = NULL;
2521   b->imax      = NULL;
2522   b->row       = perm;
2523   b->col       = perm;
2524   PetscCall(PetscObjectReference((PetscObject)perm));
2525   PetscCall(PetscObjectReference((PetscObject)perm));
2526   b->icol          = iperm;
2527   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2528 
2529   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2530 
2531   b->maxnz = b->nz = ui[am];
2532   b->free_a        = PETSC_TRUE;
2533   b->free_ij       = PETSC_TRUE;
2534 
2535   fact->info.factor_mallocs   = reallocs;
2536   fact->info.fill_ratio_given = fill;
2537   if (ai[am] != 0) {
2538     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2539     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2540   } else {
2541     fact->info.fill_ratio_needed = 0.0;
2542   }
2543 #if defined(PETSC_USE_INFO)
2544   if (ai[am] != 0) {
2545     PetscReal af = fact->info.fill_ratio_needed;
2546     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2547     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2548     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2549   } else {
2550     PetscCall(PetscInfo(A, "Empty matrix\n"));
2551   }
2552 #endif
2553   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2554   PetscFunctionReturn(PETSC_SUCCESS);
2555 }
2556 
2557 #if 0
2558 // unused
2559 static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2560 {
2561   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2562   Mat_SeqSBAIJ      *b;
2563   PetscBool          perm_identity, missing;
2564   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2565   const PetscInt    *rip, *riip;
2566   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2567   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2568   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2569   PetscReal          fill = info->fill, levels = info->levels;
2570   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2571   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2572   PetscBT            lnkbt;
2573   IS                 iperm;
2574 
2575   PetscFunctionBegin;
2576   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2577   PetscCall(MatMissingDiagonal(A, &missing, &d));
2578   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2579   PetscCall(ISIdentity(perm, &perm_identity));
2580   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2581 
2582   PetscCall(PetscMalloc1(am + 1, &ui));
2583   PetscCall(PetscMalloc1(am + 1, &udiag));
2584   ui[0] = 0;
2585 
2586   /* ICC(0) without matrix ordering: simply copies fill pattern */
2587   if (!levels && perm_identity) {
2588     for (i = 0; i < am; i++) {
2589       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2590       udiag[i]  = ui[i];
2591     }
2592     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2593     cols = uj;
2594     for (i = 0; i < am; i++) {
2595       aj    = a->j + a->diag[i];
2596       ncols = ui[i + 1] - ui[i];
2597       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2598     }
2599   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2600     PetscCall(ISGetIndices(iperm, &riip));
2601     PetscCall(ISGetIndices(perm, &rip));
2602 
2603     /* initialization */
2604     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2605 
2606     /* jl: linked list for storing indices of the pivot rows
2607        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2608     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2609     for (i = 0; i < am; i++) {
2610       jl[i] = am;
2611       il[i] = 0;
2612     }
2613 
2614     /* create and initialize a linked list for storing column indices of the active row k */
2615     nlnk = am + 1;
2616     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2617 
2618     /* initial FreeSpace size is fill*(ai[am]+1) */
2619     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2620     current_space = free_space;
2621     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2622     current_space_lvl = free_space_lvl;
2623 
2624     for (k = 0; k < am; k++) { /* for each active row k */
2625       /* initialize lnk by the column indices of row rip[k] of A */
2626       nzk   = 0;
2627       ncols = ai[rip[k] + 1] - ai[rip[k]];
2628       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2629       ncols_upper = 0;
2630       for (j = 0; j < ncols; j++) {
2631         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2632         if (riip[i] >= k) {         /* only take upper triangular entry */
2633           ajtmp[ncols_upper] = i;
2634           ncols_upper++;
2635         }
2636       }
2637       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2638       nzk += nlnk;
2639 
2640       /* update lnk by computing fill-in for each pivot row to be merged in */
2641       prow = jl[k]; /* 1st pivot row */
2642 
2643       while (prow < k) {
2644         nextprow = jl[prow];
2645 
2646         /* merge prow into k-th row */
2647         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2648         jmax  = ui[prow + 1];
2649         ncols = jmax - jmin;
2650         i     = jmin - ui[prow];
2651         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2652         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2653         j     = *(uj - 1);
2654         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2655         nzk += nlnk;
2656 
2657         /* update il and jl for prow */
2658         if (jmin < jmax) {
2659           il[prow] = jmin;
2660           j        = *cols;
2661           jl[prow] = jl[j];
2662           jl[j]    = prow;
2663         }
2664         prow = nextprow;
2665       }
2666 
2667       /* if free space is not available, make more free space */
2668       if (current_space->local_remaining < nzk) {
2669         i = am - k + 1;                                    /* num of unfactored rows */
2670         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2671         PetscCall(PetscFreeSpaceGet(i, &current_space));
2672         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2673         reallocs++;
2674       }
2675 
2676       /* copy data into free_space and free_space_lvl, then initialize lnk */
2677       PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2678       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2679 
2680       /* add the k-th row into il and jl */
2681       if (nzk > 1) {
2682         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2683         jl[k] = jl[i];
2684         jl[i] = k;
2685         il[k] = ui[k] + 1;
2686       }
2687       uj_ptr[k]     = current_space->array;
2688       uj_lvl_ptr[k] = current_space_lvl->array;
2689 
2690       current_space->array += nzk;
2691       current_space->local_used += nzk;
2692       current_space->local_remaining -= nzk;
2693 
2694       current_space_lvl->array += nzk;
2695       current_space_lvl->local_used += nzk;
2696       current_space_lvl->local_remaining -= nzk;
2697 
2698       ui[k + 1] = ui[k] + nzk;
2699     }
2700 
2701   #if defined(PETSC_USE_INFO)
2702     if (ai[am] != 0) {
2703       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2704       PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2705       PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2706       PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2707     } else {
2708       PetscCall(PetscInfo(A, "Empty matrix\n"));
2709     }
2710   #endif
2711 
2712     PetscCall(ISRestoreIndices(perm, &rip));
2713     PetscCall(ISRestoreIndices(iperm, &riip));
2714     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2715     PetscCall(PetscFree(ajtmp));
2716 
2717     /* destroy list of free space and other temporary array(s) */
2718     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2719     PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2720     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2721     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2722 
2723   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2724 
2725   /* put together the new matrix in MATSEQSBAIJ format */
2726 
2727   b               = (Mat_SeqSBAIJ *)fact->data;
2728   b->singlemalloc = PETSC_FALSE;
2729 
2730   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2731 
2732   b->j         = uj;
2733   b->i         = ui;
2734   b->diag      = udiag;
2735   b->free_diag = PETSC_TRUE;
2736   b->ilen      = NULL;
2737   b->imax      = NULL;
2738   b->row       = perm;
2739   b->col       = perm;
2740 
2741   PetscCall(PetscObjectReference((PetscObject)perm));
2742   PetscCall(PetscObjectReference((PetscObject)perm));
2743 
2744   b->icol          = iperm;
2745   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2746   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2747   b->maxnz = b->nz = ui[am];
2748   b->free_a        = PETSC_TRUE;
2749   b->free_ij       = PETSC_TRUE;
2750 
2751   fact->info.factor_mallocs   = reallocs;
2752   fact->info.fill_ratio_given = fill;
2753   if (ai[am] != 0) {
2754     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2755   } else {
2756     fact->info.fill_ratio_needed = 0.0;
2757   }
2758   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2759   PetscFunctionReturn(PETSC_SUCCESS);
2760 }
2761 #endif
2762 
2763 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2764 {
2765   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2766   Mat_SeqSBAIJ      *b;
2767   PetscBool          perm_identity, missing;
2768   PetscReal          fill = info->fill;
2769   const PetscInt    *rip, *riip;
2770   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2771   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2772   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2773   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2774   PetscBT            lnkbt;
2775   IS                 iperm;
2776 
2777   PetscFunctionBegin;
2778   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2779   PetscCall(MatMissingDiagonal(A, &missing, &i));
2780   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2781 
2782   /* check whether perm is the identity mapping */
2783   PetscCall(ISIdentity(perm, &perm_identity));
2784   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2785   PetscCall(ISGetIndices(iperm, &riip));
2786   PetscCall(ISGetIndices(perm, &rip));
2787 
2788   /* initialization */
2789   PetscCall(PetscMalloc1(am + 1, &ui));
2790   PetscCall(PetscMalloc1(am + 1, &udiag));
2791   ui[0] = 0;
2792 
2793   /* jl: linked list for storing indices of the pivot rows
2794      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2795   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2796   for (i = 0; i < am; i++) {
2797     jl[i] = am;
2798     il[i] = 0;
2799   }
2800 
2801   /* create and initialize a linked list for storing column indices of the active row k */
2802   nlnk = am + 1;
2803   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2804 
2805   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2806   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2807   current_space = free_space;
2808 
2809   for (k = 0; k < am; k++) { /* for each active row k */
2810     /* initialize lnk by the column indices of row rip[k] of A */
2811     nzk   = 0;
2812     ncols = ai[rip[k] + 1] - ai[rip[k]];
2813     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2814     ncols_upper = 0;
2815     for (j = 0; j < ncols; j++) {
2816       i = riip[*(aj + ai[rip[k]] + j)];
2817       if (i >= k) { /* only take upper triangular entry */
2818         cols[ncols_upper] = i;
2819         ncols_upper++;
2820       }
2821     }
2822     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2823     nzk += nlnk;
2824 
2825     /* update lnk by computing fill-in for each pivot row to be merged in */
2826     prow = jl[k]; /* 1st pivot row */
2827 
2828     while (prow < k) {
2829       nextprow = jl[prow];
2830       /* merge prow into k-th row */
2831       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2832       jmax   = ui[prow + 1];
2833       ncols  = jmax - jmin;
2834       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2835       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2836       nzk += nlnk;
2837 
2838       /* update il and jl for prow */
2839       if (jmin < jmax) {
2840         il[prow] = jmin;
2841         j        = *uj_ptr;
2842         jl[prow] = jl[j];
2843         jl[j]    = prow;
2844       }
2845       prow = nextprow;
2846     }
2847 
2848     /* if free space is not available, make more free space */
2849     if (current_space->local_remaining < nzk) {
2850       i = am - k + 1;                                    /* num of unfactored rows */
2851       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2852       PetscCall(PetscFreeSpaceGet(i, &current_space));
2853       reallocs++;
2854     }
2855 
2856     /* copy data into free space, then initialize lnk */
2857     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2858 
2859     /* add the k-th row into il and jl */
2860     if (nzk > 1) {
2861       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2862       jl[k] = jl[i];
2863       jl[i] = k;
2864       il[k] = ui[k] + 1;
2865     }
2866     ui_ptr[k] = current_space->array;
2867 
2868     current_space->array += nzk;
2869     current_space->local_used += nzk;
2870     current_space->local_remaining -= nzk;
2871 
2872     ui[k + 1] = ui[k] + nzk;
2873   }
2874 
2875   PetscCall(ISRestoreIndices(perm, &rip));
2876   PetscCall(ISRestoreIndices(iperm, &riip));
2877   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2878 
2879   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2880   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2881   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2882   PetscCall(PetscLLDestroy(lnk, lnkbt));
2883 
2884   /* put together the new matrix in MATSEQSBAIJ format */
2885 
2886   b               = (Mat_SeqSBAIJ *)fact->data;
2887   b->singlemalloc = PETSC_FALSE;
2888   b->free_a       = PETSC_TRUE;
2889   b->free_ij      = PETSC_TRUE;
2890 
2891   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2892 
2893   b->j         = uj;
2894   b->i         = ui;
2895   b->diag      = udiag;
2896   b->free_diag = PETSC_TRUE;
2897   b->ilen      = NULL;
2898   b->imax      = NULL;
2899   b->row       = perm;
2900   b->col       = perm;
2901 
2902   PetscCall(PetscObjectReference((PetscObject)perm));
2903   PetscCall(PetscObjectReference((PetscObject)perm));
2904 
2905   b->icol          = iperm;
2906   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2907 
2908   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2909 
2910   b->maxnz = b->nz = ui[am];
2911 
2912   fact->info.factor_mallocs   = reallocs;
2913   fact->info.fill_ratio_given = fill;
2914   if (ai[am] != 0) {
2915     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2916     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2917   } else {
2918     fact->info.fill_ratio_needed = 0.0;
2919   }
2920 #if defined(PETSC_USE_INFO)
2921   if (ai[am] != 0) {
2922     PetscReal af = fact->info.fill_ratio_needed;
2923     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2924     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2925     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2926   } else {
2927     PetscCall(PetscInfo(A, "Empty matrix\n"));
2928   }
2929 #endif
2930   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2931   PetscFunctionReturn(PETSC_SUCCESS);
2932 }
2933 
2934 #if 0
2935 // unused
2936 static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2937 {
2938   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2939   Mat_SeqSBAIJ      *b;
2940   PetscBool          perm_identity, missing;
2941   PetscReal          fill = info->fill;
2942   const PetscInt    *rip, *riip;
2943   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2944   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2945   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
2946   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2947   PetscBT            lnkbt;
2948   IS                 iperm;
2949 
2950   PetscFunctionBegin;
2951   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2952   PetscCall(MatMissingDiagonal(A, &missing, &i));
2953   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2954 
2955   /* check whether perm is the identity mapping */
2956   PetscCall(ISIdentity(perm, &perm_identity));
2957   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2958   PetscCall(ISGetIndices(iperm, &riip));
2959   PetscCall(ISGetIndices(perm, &rip));
2960 
2961   /* initialization */
2962   PetscCall(PetscMalloc1(am + 1, &ui));
2963   ui[0] = 0;
2964 
2965   /* jl: linked list for storing indices of the pivot rows
2966      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2967   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2968   for (i = 0; i < am; i++) {
2969     jl[i] = am;
2970     il[i] = 0;
2971   }
2972 
2973   /* create and initialize a linked list for storing column indices of the active row k */
2974   nlnk = am + 1;
2975   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2976 
2977   /* initial FreeSpace size is fill*(ai[am]+1) */
2978   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2979   current_space = free_space;
2980 
2981   for (k = 0; k < am; k++) { /* for each active row k */
2982     /* initialize lnk by the column indices of row rip[k] of A */
2983     nzk   = 0;
2984     ncols = ai[rip[k] + 1] - ai[rip[k]];
2985     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2986     ncols_upper = 0;
2987     for (j = 0; j < ncols; j++) {
2988       i = riip[*(aj + ai[rip[k]] + j)];
2989       if (i >= k) { /* only take upper triangular entry */
2990         cols[ncols_upper] = i;
2991         ncols_upper++;
2992       }
2993     }
2994     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2995     nzk += nlnk;
2996 
2997     /* update lnk by computing fill-in for each pivot row to be merged in */
2998     prow = jl[k]; /* 1st pivot row */
2999 
3000     while (prow < k) {
3001       nextprow = jl[prow];
3002       /* merge prow into k-th row */
3003       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3004       jmax   = ui[prow + 1];
3005       ncols  = jmax - jmin;
3006       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3007       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
3008       nzk += nlnk;
3009 
3010       /* update il and jl for prow */
3011       if (jmin < jmax) {
3012         il[prow] = jmin;
3013         j        = *uj_ptr;
3014         jl[prow] = jl[j];
3015         jl[j]    = prow;
3016       }
3017       prow = nextprow;
3018     }
3019 
3020     /* if free space is not available, make more free space */
3021     if (current_space->local_remaining < nzk) {
3022       i = am - k + 1;                     /* num of unfactored rows */
3023       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3024       PetscCall(PetscFreeSpaceGet(i, &current_space));
3025       reallocs++;
3026     }
3027 
3028     /* copy data into free space, then initialize lnk */
3029     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
3030 
3031     /* add the k-th row into il and jl */
3032     if (nzk - 1 > 0) {
3033       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3034       jl[k] = jl[i];
3035       jl[i] = k;
3036       il[k] = ui[k] + 1;
3037     }
3038     ui_ptr[k] = current_space->array;
3039 
3040     current_space->array += nzk;
3041     current_space->local_used += nzk;
3042     current_space->local_remaining -= nzk;
3043 
3044     ui[k + 1] = ui[k] + nzk;
3045   }
3046 
3047   #if defined(PETSC_USE_INFO)
3048   if (ai[am] != 0) {
3049     PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]);
3050     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3051     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3052     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3053   } else {
3054     PetscCall(PetscInfo(A, "Empty matrix\n"));
3055   }
3056   #endif
3057 
3058   PetscCall(ISRestoreIndices(perm, &rip));
3059   PetscCall(ISRestoreIndices(iperm, &riip));
3060   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
3061 
3062   /* destroy list of free space and other temporary array(s) */
3063   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
3064   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3065   PetscCall(PetscLLDestroy(lnk, lnkbt));
3066 
3067   /* put together the new matrix in MATSEQSBAIJ format */
3068 
3069   b               = (Mat_SeqSBAIJ *)fact->data;
3070   b->singlemalloc = PETSC_FALSE;
3071   b->free_a       = PETSC_TRUE;
3072   b->free_ij      = PETSC_TRUE;
3073 
3074   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
3075 
3076   b->j    = uj;
3077   b->i    = ui;
3078   b->diag = NULL;
3079   b->ilen = NULL;
3080   b->imax = NULL;
3081   b->row  = perm;
3082   b->col  = perm;
3083 
3084   PetscCall(PetscObjectReference((PetscObject)perm));
3085   PetscCall(PetscObjectReference((PetscObject)perm));
3086 
3087   b->icol          = iperm;
3088   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3089 
3090   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3091   b->maxnz = b->nz = ui[am];
3092 
3093   fact->info.factor_mallocs   = reallocs;
3094   fact->info.fill_ratio_given = fill;
3095   if (ai[am] != 0) {
3096     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
3097   } else {
3098     fact->info.fill_ratio_needed = 0.0;
3099   }
3100   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3101   PetscFunctionReturn(PETSC_SUCCESS);
3102 }
3103 #endif
3104 
3105 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3106 {
3107   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3108   PetscInt           n  = A->rmap->n;
3109   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3110   PetscScalar       *x, sum;
3111   const PetscScalar *b;
3112   const MatScalar   *aa = a->a, *v;
3113   PetscInt           i, nz;
3114 
3115   PetscFunctionBegin;
3116   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3117 
3118   PetscCall(VecGetArrayRead(bb, &b));
3119   PetscCall(VecGetArrayWrite(xx, &x));
3120 
3121   /* forward solve the lower triangular */
3122   x[0] = b[0];
3123   v    = aa;
3124   vi   = aj;
3125   for (i = 1; i < n; i++) {
3126     nz  = ai[i + 1] - ai[i];
3127     sum = b[i];
3128     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3129     v += nz;
3130     vi += nz;
3131     x[i] = sum;
3132   }
3133 
3134   /* backward solve the upper triangular */
3135   for (i = n - 1; i >= 0; i--) {
3136     v   = aa + adiag[i + 1] + 1;
3137     vi  = aj + adiag[i + 1] + 1;
3138     nz  = adiag[i] - adiag[i + 1] - 1;
3139     sum = x[i];
3140     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3141     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3142   }
3143 
3144   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3145   PetscCall(VecRestoreArrayRead(bb, &b));
3146   PetscCall(VecRestoreArrayWrite(xx, &x));
3147   PetscFunctionReturn(PETSC_SUCCESS);
3148 }
3149 
3150 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3151 {
3152   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3153   IS                 iscol = a->col, isrow = a->row;
3154   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3155   const PetscInt    *rout, *cout, *r, *c;
3156   PetscScalar       *x, *tmp, sum;
3157   const PetscScalar *b;
3158   const MatScalar   *aa = a->a, *v;
3159 
3160   PetscFunctionBegin;
3161   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3162 
3163   PetscCall(VecGetArrayRead(bb, &b));
3164   PetscCall(VecGetArrayWrite(xx, &x));
3165   tmp = a->solve_work;
3166 
3167   PetscCall(ISGetIndices(isrow, &rout));
3168   r = rout;
3169   PetscCall(ISGetIndices(iscol, &cout));
3170   c = cout;
3171 
3172   /* forward solve the lower triangular */
3173   tmp[0] = b[r[0]];
3174   v      = aa;
3175   vi     = aj;
3176   for (i = 1; i < n; i++) {
3177     nz  = ai[i + 1] - ai[i];
3178     sum = b[r[i]];
3179     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3180     tmp[i] = sum;
3181     v += nz;
3182     vi += nz;
3183   }
3184 
3185   /* backward solve the upper triangular */
3186   for (i = n - 1; i >= 0; i--) {
3187     v   = aa + adiag[i + 1] + 1;
3188     vi  = aj + adiag[i + 1] + 1;
3189     nz  = adiag[i] - adiag[i + 1] - 1;
3190     sum = tmp[i];
3191     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3192     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3193   }
3194 
3195   PetscCall(ISRestoreIndices(isrow, &rout));
3196   PetscCall(ISRestoreIndices(iscol, &cout));
3197   PetscCall(VecRestoreArrayRead(bb, &b));
3198   PetscCall(VecRestoreArrayWrite(xx, &x));
3199   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3200   PetscFunctionReturn(PETSC_SUCCESS);
3201 }
3202 
3203 #if 0
3204 // unused
3205 /*
3206     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3207 */
3208 static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3209 {
3210   Mat             B = *fact;
3211   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3212   IS              isicol;
3213   const PetscInt *r, *ic;
3214   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3215   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3216   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3217   PetscInt        nlnk, *lnk;
3218   PetscBT         lnkbt;
3219   PetscBool       row_identity, icol_identity;
3220   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3221   const PetscInt *ics;
3222   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3223   PetscReal       dt = info->dt, shift = info->shiftamount;
3224   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3225   PetscBool       missing;
3226 
3227   PetscFunctionBegin;
3228   if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005;
3229   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3230 
3231   /* symbolic factorization, can be reused */
3232   PetscCall(MatMissingDiagonal(A, &missing, &i));
3233   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3234   adiag = a->diag;
3235 
3236   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3237 
3238   /* bdiag is location of diagonal in factor */
3239   PetscCall(PetscMalloc1(n + 1, &bdiag));     /* becomes b->diag */
3240   PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */
3241 
3242   /* allocate row pointers bi */
3243   PetscCall(PetscMalloc1(2 * n + 2, &bi));
3244 
3245   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3246   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3247   nnz_max = ai[n] + 2 * n * dtcount + 2;
3248 
3249   PetscCall(PetscMalloc1(nnz_max + 1, &bj));
3250   PetscCall(PetscMalloc1(nnz_max + 1, &ba));
3251 
3252   /* put together the new matrix */
3253   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3254   b = (Mat_SeqAIJ *)B->data;
3255 
3256   b->free_a       = PETSC_TRUE;
3257   b->free_ij      = PETSC_TRUE;
3258   b->singlemalloc = PETSC_FALSE;
3259 
3260   b->a    = ba;
3261   b->j    = bj;
3262   b->i    = bi;
3263   b->diag = bdiag;
3264   b->ilen = NULL;
3265   b->imax = NULL;
3266   b->row  = isrow;
3267   b->col  = iscol;
3268   PetscCall(PetscObjectReference((PetscObject)isrow));
3269   PetscCall(PetscObjectReference((PetscObject)iscol));
3270   b->icol = isicol;
3271 
3272   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3273   b->maxnz = nnz_max;
3274 
3275   B->factortype            = MAT_FACTOR_ILUDT;
3276   B->info.factor_mallocs   = 0;
3277   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3278   /*  end of symbolic factorization */
3279 
3280   PetscCall(ISGetIndices(isrow, &r));
3281   PetscCall(ISGetIndices(isicol, &ic));
3282   ics = ic;
3283 
3284   /* linked list for storing column indices of the active row */
3285   nlnk = n + 1;
3286   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
3287 
3288   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3289   PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3290   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3291   PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3292   PetscCall(PetscArrayzero(rtmp, n));
3293 
3294   bi[0]         = 0;
3295   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3296   bdiag_rev[n]  = bdiag[0];
3297   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3298   for (i = 0; i < n; i++) {
3299     /* copy initial fill into linked list */
3300     nzi = ai[r[i] + 1] - ai[r[i]];
3301     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3302     nzi_al = adiag[r[i]] - ai[r[i]];
3303     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3304     ajtmp  = aj + ai[r[i]];
3305     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3306 
3307     /* load in initial (unfactored row) */
3308     aatmp = a->a + ai[r[i]];
3309     for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;
3310 
3311     /* add pivot rows into linked list */
3312     row = lnk[n];
3313     while (row < i) {
3314       nzi_bl = bi[row + 1] - bi[row] + 1;
3315       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3316       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3317       nzi += nlnk;
3318       row = lnk[row];
3319     }
3320 
3321     /* copy data from lnk into jtmp, then initialize lnk */
3322     PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));
3323 
3324     /* numerical factorization */
3325     bjtmp = jtmp;
3326     row   = *bjtmp++; /* 1st pivot row */
3327     while (row < i) {
3328       pc         = rtmp + row;
3329       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3330       multiplier = (*pc) * (*pv);
3331       *pc        = multiplier;
3332       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3333         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3334         pv = ba + bdiag[row + 1] + 1;
3335         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3336         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3337         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3338       }
3339       row = *bjtmp++;
3340     }
3341 
3342     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3343     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3344     nzi_bl   = 0;
3345     j        = 0;
3346     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3347       vtmp[j]       = rtmp[jtmp[j]];
3348       rtmp[jtmp[j]] = 0.0;
3349       nzi_bl++;
3350       j++;
3351     }
3352     nzi_bu = nzi - nzi_bl - 1;
3353     while (j < nzi) {
3354       vtmp[j]       = rtmp[jtmp[j]];
3355       rtmp[jtmp[j]] = 0.0;
3356       j++;
3357     }
3358 
3359     bjtmp = bj + bi[i];
3360     batmp = ba + bi[i];
3361     /* apply level dropping rule to L part */
3362     ncut = nzi_al + dtcount;
3363     if (ncut < nzi_bl) {
3364       PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3365       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3366     } else {
3367       ncut = nzi_bl;
3368     }
3369     for (j = 0; j < ncut; j++) {
3370       bjtmp[j] = jtmp[j];
3371       batmp[j] = vtmp[j];
3372     }
3373     bi[i + 1] = bi[i] + ncut;
3374     nzi       = ncut + 1;
3375 
3376     /* apply level dropping rule to U part */
3377     ncut = nzi_au + dtcount;
3378     if (ncut < nzi_bu) {
3379       PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3380       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3381     } else {
3382       ncut = nzi_bu;
3383     }
3384     nzi += ncut;
3385 
3386     /* mark bdiagonal */
3387     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3388     bdiag_rev[n - i - 1] = bdiag[i + 1];
3389     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
3390     bjtmp                = bj + bdiag[i];
3391     batmp                = ba + bdiag[i];
3392     *bjtmp               = i;
3393     *batmp               = diag_tmp; /* rtmp[i]; */
3394     if (*batmp == 0.0) *batmp = dt + shift;
3395     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
3396 
3397     bjtmp = bj + bdiag[i + 1] + 1;
3398     batmp = ba + bdiag[i + 1] + 1;
3399     for (k = 0; k < ncut; k++) {
3400       bjtmp[k] = jtmp[nzi_bl + 1 + k];
3401       batmp[k] = vtmp[nzi_bl + 1 + k];
3402     }
3403 
3404     im[i] = nzi; /* used by PetscLLAddSortedLU() */
3405   }              /* for (i=0; i<n; i++) */
3406   PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);
3407 
3408   PetscCall(ISRestoreIndices(isrow, &r));
3409   PetscCall(ISRestoreIndices(isicol, &ic));
3410 
3411   PetscCall(PetscLLDestroy(lnk, lnkbt));
3412   PetscCall(PetscFree2(im, jtmp));
3413   PetscCall(PetscFree2(rtmp, vtmp));
3414   PetscCall(PetscFree(bdiag_rev));
3415 
3416   PetscCall(PetscLogFlops(B->cmap->n));
3417   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3418 
3419   PetscCall(ISIdentity(isrow, &row_identity));
3420   PetscCall(ISIdentity(isicol, &icol_identity));
3421   if (row_identity && icol_identity) {
3422     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3423   } else {
3424     B->ops->solve = MatSolve_SeqAIJ;
3425   }
3426 
3427   B->ops->solveadd          = NULL;
3428   B->ops->solvetranspose    = NULL;
3429   B->ops->solvetransposeadd = NULL;
3430   B->ops->matsolve          = NULL;
3431   B->assembled              = PETSC_TRUE;
3432   B->preallocated           = PETSC_TRUE;
3433   PetscFunctionReturn(PETSC_SUCCESS);
3434 }
3435 
3436 /* a wrapper of MatILUDTFactor_SeqAIJ() */
3437 /*
3438     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3439 */
3440 
3441 static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3442 {
3443   PetscFunctionBegin;
3444   PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3445   PetscFunctionReturn(PETSC_SUCCESS);
3446 }
3447 
3448 /*
3449    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3450    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3451 */
3452 /*
3453     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3454 */
3455 
3456 static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3457 {
3458   Mat             C = fact;
3459   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3460   IS              isrow = b->row, isicol = b->icol;
3461   const PetscInt *r, *ic, *ics;
3462   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3463   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3464   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3465   PetscReal       dt = info->dt, shift = info->shiftamount;
3466   PetscBool       row_identity, col_identity;
3467 
3468   PetscFunctionBegin;
3469   PetscCall(ISGetIndices(isrow, &r));
3470   PetscCall(ISGetIndices(isicol, &ic));
3471   PetscCall(PetscMalloc1(n + 1, &rtmp));
3472   ics = ic;
3473 
3474   for (i = 0; i < n; i++) {
3475     /* initialize rtmp array */
3476     nzl   = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */
3477     bjtmp = bj + bi[i];
3478     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3479     rtmp[i] = 0.0;
3480     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */
3481     bjtmp   = bj + bdiag[i + 1] + 1;
3482     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
3483 
3484     /* load in initial unfactored row of A */
3485     nz    = ai[r[i] + 1] - ai[r[i]];
3486     ajtmp = aj + ai[r[i]];
3487     v     = aa + ai[r[i]];
3488     for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];
3489 
3490     /* numerical factorization */
3491     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3492     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3493     k     = 0;
3494     while (k < nzl) {
3495       row        = *bjtmp++;
3496       pc         = rtmp + row;
3497       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3498       multiplier = (*pc) * (*pv);
3499       *pc        = multiplier;
3500       if (PetscAbsScalar(multiplier) > dt) {
3501         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3502         pv = b->a + bdiag[row + 1] + 1;
3503         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3504         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3505         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3506       }
3507       k++;
3508     }
3509 
3510     /* finished row so stick it into b->a */
3511     /* L-part */
3512     pv  = b->a + bi[i];
3513     pj  = bj + bi[i];
3514     nzl = bi[i + 1] - bi[i];
3515     for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];
3516 
3517     /* diagonal: invert diagonal entries for simpler triangular solves */
3518     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3519     b->a[bdiag[i]] = 1.0 / rtmp[i];
3520 
3521     /* U-part */
3522     pv  = b->a + bdiag[i + 1] + 1;
3523     pj  = bj + bdiag[i + 1] + 1;
3524     nzu = bdiag[i] - bdiag[i + 1] - 1;
3525     for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3526   }
3527 
3528   PetscCall(PetscFree(rtmp));
3529   PetscCall(ISRestoreIndices(isicol, &ic));
3530   PetscCall(ISRestoreIndices(isrow, &r));
3531 
3532   PetscCall(ISIdentity(isrow, &row_identity));
3533   PetscCall(ISIdentity(isicol, &col_identity));
3534   if (row_identity && col_identity) {
3535     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3536   } else {
3537     C->ops->solve = MatSolve_SeqAIJ;
3538   }
3539   C->ops->solveadd          = NULL;
3540   C->ops->solvetranspose    = NULL;
3541   C->ops->solvetransposeadd = NULL;
3542   C->ops->matsolve          = NULL;
3543   C->assembled              = PETSC_TRUE;
3544   C->preallocated           = PETSC_TRUE;
3545 
3546   PetscCall(PetscLogFlops(C->cmap->n));
3547   PetscFunctionReturn(PETSC_SUCCESS);
3548 }
3549 #endif
3550