xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision f13dfd9ea68e0ddeee984e65c377a1819eab8a8a)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/sbaij/seq/sbaij.h>
3 #include <petscbt.h>
4 #include <../src/mat/utils/freespace.h>
5 
6 /*
7       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
8 
9       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
10 */
11 #if 0
12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
13 {
14   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
15   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
16   const PetscInt    *ai = a->i, *aj = a->j;
17   const PetscScalar *aa = a->a;
18   PetscBool         *done;
19   PetscReal          best, past = 0, future;
20 
21   PetscFunctionBegin;
22   /* pick initial row */
23   best = -1;
24   for (i = 0; i < n; i++) {
25     future = 0.0;
26     for (j = ai[i]; j < ai[i + 1]; j++) {
27       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
28       else past = PetscAbsScalar(aa[j]);
29     }
30     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
31     if (past / future > best) {
32       best    = past / future;
33       current = i;
34     }
35   }
36 
37   PetscCall(PetscMalloc1(n, &done));
38   PetscCall(PetscArrayzero(done, n));
39   PetscCall(PetscMalloc1(n, &order));
40   order[0] = current;
41   for (i = 0; i < n - 1; i++) {
42     done[current] = PETSC_TRUE;
43     best          = -1;
44     /* loop over all neighbors of current pivot */
45     for (j = ai[current]; j < ai[current + 1]; j++) {
46       jj = aj[j];
47       if (done[jj]) continue;
48       /* loop over columns of potential next row computing weights for below and above diagonal */
49       past = future = 0.0;
50       for (k = ai[jj]; k < ai[jj + 1]; k++) {
51         kk = aj[k];
52         if (done[kk]) past += PetscAbsScalar(aa[k]);
53         else if (kk != jj) future += PetscAbsScalar(aa[k]);
54       }
55       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
56       if (past / future > best) {
57         best       = past / future;
58         newcurrent = jj;
59       }
60     }
61     if (best == -1) { /* no neighbors to select from so select best of all that remain */
62       best = -1;
63       for (k = 0; k < n; k++) {
64         if (done[k]) continue;
65         future = 0.0;
66         past   = 0.0;
67         for (j = ai[k]; j < ai[k + 1]; j++) {
68           kk = aj[j];
69           if (done[kk]) past += PetscAbsScalar(aa[j]);
70           else if (kk != k) future += PetscAbsScalar(aa[j]);
71         }
72         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
73         if (past / future > best) {
74           best       = past / future;
75           newcurrent = k;
76         }
77       }
78     }
79     PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
80     current      = newcurrent;
81     order[i + 1] = current;
82   }
83   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
84   *icol = *irow;
85   PetscCall(PetscObjectReference((PetscObject)*irow));
86   PetscCall(PetscFree(done));
87   PetscCall(PetscFree(order));
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 #endif
91 
92 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
93 {
94   PetscFunctionBegin;
95   *type = MATSOLVERPETSC;
96   PetscFunctionReturn(PETSC_SUCCESS);
97 }
98 
99 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
100 {
101   PetscInt n = A->rmap->n;
102 
103   PetscFunctionBegin;
104 #if defined(PETSC_USE_COMPLEX)
105   if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
106     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
107     *B = NULL;
108     PetscFunctionReturn(PETSC_SUCCESS);
109   }
110 #endif
111 
112   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
113   PetscCall(MatSetSizes(*B, n, n, n, n));
114   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
115     PetscCall(MatSetType(*B, MATSEQAIJ));
116 
117     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
118     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
119 
120     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
121     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
122     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
123     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
124   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
125     PetscCall(MatSetType(*B, MATSEQSBAIJ));
126     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
127 
128     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
129     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
130     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
131     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
132   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
133   (*B)->factortype = ftype;
134 
135   PetscCall(PetscFree((*B)->solvertype));
136   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
137   (*B)->canuseordering = PETSC_TRUE;
138   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
139   PetscFunctionReturn(PETSC_SUCCESS);
140 }
141 
142 #if 0
143 // currently unused
144 static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
145 {
146   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
147   IS                 isicol;
148   const PetscInt    *r, *ic;
149   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
150   PetscInt          *bi, *bj, *ajtmp;
151   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
152   PetscReal          f;
153   PetscInt           nlnk, *lnk, k, **bi_ptr;
154   PetscFreeSpaceList free_space = NULL, current_space = NULL;
155   PetscBT            lnkbt;
156   PetscBool          missing;
157 
158   PetscFunctionBegin;
159   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
160   PetscCall(MatMissingDiagonal(A, &missing, &i));
161   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
162 
163   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
164   PetscCall(ISGetIndices(isrow, &r));
165   PetscCall(ISGetIndices(isicol, &ic));
166 
167   /* get new row pointers */
168   PetscCall(PetscMalloc1(n + 1, &bi));
169   bi[0] = 0;
170 
171   /* bdiag is location of diagonal in factor */
172   PetscCall(PetscMalloc1(n + 1, &bdiag));
173   bdiag[0] = 0;
174 
175   /* linked list for storing column indices of the active row */
176   nlnk = n + 1;
177   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
178 
179   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
180 
181   /* initial FreeSpace size is f*(ai[n]+1) */
182   f = info->fill;
183   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
184   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
185   current_space = free_space;
186 
187   for (i = 0; i < n; i++) {
188     /* copy previous fill into linked list */
189     nzi   = 0;
190     nnz   = ai[r[i] + 1] - ai[r[i]];
191     ajtmp = aj + ai[r[i]];
192     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
193     nzi += nlnk;
194 
195     /* add pivot rows into linked list */
196     row = lnk[n];
197     while (row < i) {
198       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
199       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
200       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
201       nzi += nlnk;
202       row = lnk[row];
203     }
204     bi[i + 1] = bi[i] + nzi;
205     im[i]     = nzi;
206 
207     /* mark bdiag */
208     nzbd = 0;
209     nnz  = nzi;
210     k    = lnk[n];
211     while (nnz-- && k < i) {
212       nzbd++;
213       k = lnk[k];
214     }
215     bdiag[i] = bi[i] + nzbd;
216 
217     /* if free space is not available, make more free space */
218     if (current_space->local_remaining < nzi) {
219       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
220       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
221       reallocs++;
222     }
223 
224     /* copy data into free space, then initialize lnk */
225     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
226 
227     bi_ptr[i] = current_space->array;
228     current_space->array += nzi;
229     current_space->local_used += nzi;
230     current_space->local_remaining -= nzi;
231   }
232   #if defined(PETSC_USE_INFO)
233   if (ai[n] != 0) {
234     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
235     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
236     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
237     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
238     PetscCall(PetscInfo(A, "for best performance.\n"));
239   } else {
240     PetscCall(PetscInfo(A, "Empty matrix\n"));
241   }
242   #endif
243 
244   PetscCall(ISRestoreIndices(isrow, &r));
245   PetscCall(ISRestoreIndices(isicol, &ic));
246 
247   /* destroy list of free space and other temporary array(s) */
248   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
249   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
250   PetscCall(PetscLLDestroy(lnk, lnkbt));
251   PetscCall(PetscFree2(bi_ptr, im));
252 
253   /* put together the new matrix */
254   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
255   b = (Mat_SeqAIJ *)(B)->data;
256 
257   b->free_a       = PETSC_TRUE;
258   b->free_ij      = PETSC_TRUE;
259   b->singlemalloc = PETSC_FALSE;
260 
261   PetscCall(PetscMalloc1(bi[n] + 1, &b->a));
262   b->j    = bj;
263   b->i    = bi;
264   b->diag = bdiag;
265   b->ilen = NULL;
266   b->imax = NULL;
267   b->row  = isrow;
268   b->col  = iscol;
269   PetscCall(PetscObjectReference((PetscObject)isrow));
270   PetscCall(PetscObjectReference((PetscObject)iscol));
271   b->icol = isicol;
272   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
273 
274   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
275   b->maxnz = b->nz = bi[n];
276 
277   (B)->factortype            = MAT_FACTOR_LU;
278   (B)->info.factor_mallocs   = reallocs;
279   (B)->info.fill_ratio_given = f;
280 
281   if (ai[n]) {
282     (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
283   } else {
284     (B)->info.fill_ratio_needed = 0.0;
285   }
286   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
287   if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
288   PetscFunctionReturn(PETSC_SUCCESS);
289 }
290 #endif
291 
292 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
293 {
294   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
295   IS                 isicol;
296   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
297   PetscInt           i, n = A->rmap->n;
298   PetscInt          *bi, *bj;
299   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
300   PetscReal          f;
301   PetscInt           nlnk, *lnk, k, **bi_ptr;
302   PetscFreeSpaceList free_space = NULL, current_space = NULL;
303   PetscBT            lnkbt;
304   PetscBool          missing;
305 
306   PetscFunctionBegin;
307   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
308   PetscCall(MatMissingDiagonal(A, &missing, &i));
309   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
310 
311   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
312   PetscCall(ISGetIndices(isrow, &r));
313   PetscCall(ISGetIndices(isicol, &ic));
314 
315   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
316   PetscCall(PetscMalloc1(n + 1, &bi));
317   PetscCall(PetscMalloc1(n + 1, &bdiag));
318   bi[0] = bdiag[0] = 0;
319 
320   /* linked list for storing column indices of the active row */
321   nlnk = n + 1;
322   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
323 
324   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
325 
326   /* initial FreeSpace size is f*(ai[n]+1) */
327   f = info->fill;
328   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
329   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
330   current_space = free_space;
331 
332   for (i = 0; i < n; i++) {
333     /* copy previous fill into linked list */
334     nzi   = 0;
335     nnz   = ai[r[i] + 1] - ai[r[i]];
336     ajtmp = aj + ai[r[i]];
337     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
338     nzi += nlnk;
339 
340     /* add pivot rows into linked list */
341     row = lnk[n];
342     while (row < i) {
343       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
344       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
345       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
346       nzi += nlnk;
347       row = lnk[row];
348     }
349     bi[i + 1] = bi[i] + nzi;
350     im[i]     = nzi;
351 
352     /* mark bdiag */
353     nzbd = 0;
354     nnz  = nzi;
355     k    = lnk[n];
356     while (nnz-- && k < i) {
357       nzbd++;
358       k = lnk[k];
359     }
360     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
361 
362     /* if free space is not available, make more free space */
363     if (current_space->local_remaining < nzi) {
364       /* estimated additional space needed */
365       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
366       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
367       reallocs++;
368     }
369 
370     /* copy data into free space, then initialize lnk */
371     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
372 
373     bi_ptr[i] = current_space->array;
374     current_space->array += nzi;
375     current_space->local_used += nzi;
376     current_space->local_remaining -= nzi;
377   }
378 
379   PetscCall(ISRestoreIndices(isrow, &r));
380   PetscCall(ISRestoreIndices(isicol, &ic));
381 
382   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
383   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
384   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
385   PetscCall(PetscLLDestroy(lnk, lnkbt));
386   PetscCall(PetscFree2(bi_ptr, im));
387 
388   /* put together the new matrix */
389   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
390   b = (Mat_SeqAIJ *)(B)->data;
391 
392   b->free_a       = PETSC_TRUE;
393   b->free_ij      = PETSC_TRUE;
394   b->singlemalloc = PETSC_FALSE;
395 
396   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
397 
398   b->j    = bj;
399   b->i    = bi;
400   b->diag = bdiag;
401   b->ilen = NULL;
402   b->imax = NULL;
403   b->row  = isrow;
404   b->col  = iscol;
405   PetscCall(PetscObjectReference((PetscObject)isrow));
406   PetscCall(PetscObjectReference((PetscObject)iscol));
407   b->icol = isicol;
408   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
409 
410   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
411   b->maxnz = b->nz = bdiag[0] + 1;
412 
413   B->factortype            = MAT_FACTOR_LU;
414   B->info.factor_mallocs   = reallocs;
415   B->info.fill_ratio_given = f;
416 
417   if (ai[n]) {
418     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
419   } else {
420     B->info.fill_ratio_needed = 0.0;
421   }
422 #if defined(PETSC_USE_INFO)
423   if (ai[n] != 0) {
424     PetscReal af = B->info.fill_ratio_needed;
425     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
426     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
427     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
428     PetscCall(PetscInfo(A, "for best performance.\n"));
429   } else {
430     PetscCall(PetscInfo(A, "Empty matrix\n"));
431   }
432 #endif
433   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
434   if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
435   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
436   PetscFunctionReturn(PETSC_SUCCESS);
437 }
438 
439 /*
440     Trouble in factorization, should we dump the original matrix?
441 */
442 PetscErrorCode MatFactorDumpMatrix(Mat A)
443 {
444   PetscBool flg = PETSC_FALSE;
445 
446   PetscFunctionBegin;
447   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
448   if (flg) {
449     PetscViewer viewer;
450     char        filename[PETSC_MAX_PATH_LEN];
451 
452     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
453     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
454     PetscCall(MatView(A, viewer));
455     PetscCall(PetscViewerDestroy(&viewer));
456   }
457   PetscFunctionReturn(PETSC_SUCCESS);
458 }
459 
460 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
461 {
462   Mat              C = B;
463   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
464   IS               isrow = b->row, isicol = b->icol;
465   const PetscInt  *r, *ic, *ics;
466   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
467   PetscInt         i, j, k, nz, nzL, row, *pj;
468   const PetscInt  *ajtmp, *bjtmp;
469   MatScalar       *rtmp, *pc, multiplier, *pv;
470   const MatScalar *aa = a->a, *v;
471   PetscBool        row_identity, col_identity;
472   FactorShiftCtx   sctx;
473   const PetscInt  *ddiag;
474   PetscReal        rs;
475   MatScalar        d;
476 
477   PetscFunctionBegin;
478   /* MatPivotSetUp(): initialize shift context sctx */
479   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
480 
481   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
482     ddiag          = a->diag;
483     sctx.shift_top = info->zeropivot;
484     for (i = 0; i < n; i++) {
485       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
486       d  = (aa)[ddiag[i]];
487       rs = -PetscAbsScalar(d) - PetscRealPart(d);
488       v  = aa + ai[i];
489       nz = ai[i + 1] - ai[i];
490       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
491       if (rs > sctx.shift_top) sctx.shift_top = rs;
492     }
493     sctx.shift_top *= 1.1;
494     sctx.nshift_max = 5;
495     sctx.shift_lo   = 0.;
496     sctx.shift_hi   = 1.;
497   }
498 
499   PetscCall(ISGetIndices(isrow, &r));
500   PetscCall(ISGetIndices(isicol, &ic));
501   PetscCall(PetscMalloc1(n + 1, &rtmp));
502   ics = ic;
503 
504   do {
505     sctx.newshift = PETSC_FALSE;
506     for (i = 0; i < n; i++) {
507       /* zero rtmp */
508       /* L part */
509       nz    = bi[i + 1] - bi[i];
510       bjtmp = bj + bi[i];
511       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
512 
513       /* U part */
514       nz    = bdiag[i] - bdiag[i + 1];
515       bjtmp = bj + bdiag[i + 1] + 1;
516       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
517 
518       /* load in initial (unfactored row) */
519       nz    = ai[r[i] + 1] - ai[r[i]];
520       ajtmp = aj + ai[r[i]];
521       v     = aa + ai[r[i]];
522       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
523       /* ZeropivotApply() */
524       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
525 
526       /* elimination */
527       bjtmp = bj + bi[i];
528       row   = *bjtmp++;
529       nzL   = bi[i + 1] - bi[i];
530       for (k = 0; k < nzL; k++) {
531         pc = rtmp + row;
532         if (*pc != 0.0) {
533           pv         = b->a + bdiag[row];
534           multiplier = *pc * (*pv);
535           *pc        = multiplier;
536 
537           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
538           pv = b->a + bdiag[row + 1] + 1;
539           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
540 
541           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
542           PetscCall(PetscLogFlops(1 + 2.0 * nz));
543         }
544         row = *bjtmp++;
545       }
546 
547       /* finished row so stick it into b->a */
548       rs = 0.0;
549       /* L part */
550       pv = b->a + bi[i];
551       pj = b->j + bi[i];
552       nz = bi[i + 1] - bi[i];
553       for (j = 0; j < nz; j++) {
554         pv[j] = rtmp[pj[j]];
555         rs += PetscAbsScalar(pv[j]);
556       }
557 
558       /* U part */
559       pv = b->a + bdiag[i + 1] + 1;
560       pj = b->j + bdiag[i + 1] + 1;
561       nz = bdiag[i] - bdiag[i + 1] - 1;
562       for (j = 0; j < nz; j++) {
563         pv[j] = rtmp[pj[j]];
564         rs += PetscAbsScalar(pv[j]);
565       }
566 
567       sctx.rs = rs;
568       sctx.pv = rtmp[i];
569       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
570       if (sctx.newshift) break; /* break for-loop */
571       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
572 
573       /* Mark diagonal and invert diagonal for simpler triangular solves */
574       pv  = b->a + bdiag[i];
575       *pv = 1.0 / rtmp[i];
576 
577     } /* endof for (i=0; i<n; i++) { */
578 
579     /* MatPivotRefine() */
580     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
581       /*
582        * if no shift in this attempt & shifting & started shifting & can refine,
583        * then try lower shift
584        */
585       sctx.shift_hi       = sctx.shift_fraction;
586       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
587       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
588       sctx.newshift       = PETSC_TRUE;
589       sctx.nshift++;
590     }
591   } while (sctx.newshift);
592 
593   PetscCall(PetscFree(rtmp));
594   PetscCall(ISRestoreIndices(isicol, &ic));
595   PetscCall(ISRestoreIndices(isrow, &r));
596 
597   PetscCall(ISIdentity(isrow, &row_identity));
598   PetscCall(ISIdentity(isicol, &col_identity));
599   if (b->inode.size) {
600     C->ops->solve = MatSolve_SeqAIJ_Inode;
601   } else if (row_identity && col_identity) {
602     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
603   } else {
604     C->ops->solve = MatSolve_SeqAIJ;
605   }
606   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
607   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
608   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
609   C->ops->matsolve          = MatMatSolve_SeqAIJ;
610   C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
611   C->assembled              = PETSC_TRUE;
612   C->preallocated           = PETSC_TRUE;
613 
614   PetscCall(PetscLogFlops(C->cmap->n));
615 
616   /* MatShiftView(A,info,&sctx) */
617   if (sctx.nshift) {
618     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
619       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
620     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
621       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
622     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
623       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
624     }
625   }
626   PetscFunctionReturn(PETSC_SUCCESS);
627 }
628 
629 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
630 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
631 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
632 
633 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
634 {
635   Mat              C = B;
636   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
637   IS               isrow = b->row, isicol = b->icol;
638   const PetscInt  *r, *ic, *ics;
639   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
640   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
641   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
642   MatScalar       *pv, *rtmp, *pc, multiplier, d;
643   const MatScalar *v, *aa = a->a;
644   PetscReal        rs = 0.0;
645   FactorShiftCtx   sctx;
646   const PetscInt  *ddiag;
647   PetscBool        row_identity, col_identity;
648 
649   PetscFunctionBegin;
650   /* MatPivotSetUp(): initialize shift context sctx */
651   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
652 
653   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
654     ddiag          = a->diag;
655     sctx.shift_top = info->zeropivot;
656     for (i = 0; i < n; i++) {
657       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
658       d  = (aa)[ddiag[i]];
659       rs = -PetscAbsScalar(d) - PetscRealPart(d);
660       v  = aa + ai[i];
661       nz = ai[i + 1] - ai[i];
662       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
663       if (rs > sctx.shift_top) sctx.shift_top = rs;
664     }
665     sctx.shift_top *= 1.1;
666     sctx.nshift_max = 5;
667     sctx.shift_lo   = 0.;
668     sctx.shift_hi   = 1.;
669   }
670 
671   PetscCall(ISGetIndices(isrow, &r));
672   PetscCall(ISGetIndices(isicol, &ic));
673   PetscCall(PetscMalloc1(n + 1, &rtmp));
674   ics = ic;
675 
676   do {
677     sctx.newshift = PETSC_FALSE;
678     for (i = 0; i < n; i++) {
679       nz    = bi[i + 1] - bi[i];
680       bjtmp = bj + bi[i];
681       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
682 
683       /* load in initial (unfactored row) */
684       nz    = ai[r[i] + 1] - ai[r[i]];
685       ajtmp = aj + ai[r[i]];
686       v     = aa + ai[r[i]];
687       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
688       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
689 
690       row = *bjtmp++;
691       while (row < i) {
692         pc = rtmp + row;
693         if (*pc != 0.0) {
694           pv         = b->a + diag_offset[row];
695           pj         = b->j + diag_offset[row] + 1;
696           multiplier = *pc / *pv++;
697           *pc        = multiplier;
698           nz         = bi[row + 1] - diag_offset[row] - 1;
699           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
700           PetscCall(PetscLogFlops(1 + 2.0 * nz));
701         }
702         row = *bjtmp++;
703       }
704       /* finished row so stick it into b->a */
705       pv   = b->a + bi[i];
706       pj   = b->j + bi[i];
707       nz   = bi[i + 1] - bi[i];
708       diag = diag_offset[i] - bi[i];
709       rs   = 0.0;
710       for (j = 0; j < nz; j++) {
711         pv[j] = rtmp[pj[j]];
712         rs += PetscAbsScalar(pv[j]);
713       }
714       rs -= PetscAbsScalar(pv[diag]);
715 
716       sctx.rs = rs;
717       sctx.pv = pv[diag];
718       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
719       if (sctx.newshift) break;
720       pv[diag] = sctx.pv;
721     }
722 
723     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
724       /*
725        * if no shift in this attempt & shifting & started shifting & can refine,
726        * then try lower shift
727        */
728       sctx.shift_hi       = sctx.shift_fraction;
729       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
730       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
731       sctx.newshift       = PETSC_TRUE;
732       sctx.nshift++;
733     }
734   } while (sctx.newshift);
735 
736   /* invert diagonal entries for simpler triangular solves */
737   for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
738   PetscCall(PetscFree(rtmp));
739   PetscCall(ISRestoreIndices(isicol, &ic));
740   PetscCall(ISRestoreIndices(isrow, &r));
741 
742   PetscCall(ISIdentity(isrow, &row_identity));
743   PetscCall(ISIdentity(isicol, &col_identity));
744   if (row_identity && col_identity) {
745     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
746   } else {
747     C->ops->solve = MatSolve_SeqAIJ_inplace;
748   }
749   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
750   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
751   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
752   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
753   C->ops->matsolvetranspose = NULL;
754 
755   C->assembled    = PETSC_TRUE;
756   C->preallocated = PETSC_TRUE;
757 
758   PetscCall(PetscLogFlops(C->cmap->n));
759   if (sctx.nshift) {
760     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
761       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
762     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
763       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
764     }
765   }
766   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
767   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
768 
769   PetscCall(MatSeqAIJCheckInode(C));
770   PetscFunctionReturn(PETSC_SUCCESS);
771 }
772 
773 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
774 
775 /*
776    This routine implements inplace ILU(0) with row or/and column permutations.
777    Input:
778      A - original matrix
779    Output;
780      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
781          a->j (col index) is permuted by the inverse of colperm, then sorted
782          a->a reordered accordingly with a->j
783          a->diag (ptr to diagonal elements) is updated.
784 */
785 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
786 {
787   Mat_SeqAIJ      *a     = (Mat_SeqAIJ *)A->data;
788   IS               isrow = a->row, isicol = a->icol;
789   const PetscInt  *r, *ic, *ics;
790   PetscInt         i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
791   PetscInt        *ajtmp, nz, row;
792   PetscInt        *diag = a->diag, nbdiag, *pj;
793   PetscScalar     *rtmp, *pc, multiplier, d;
794   MatScalar       *pv, *v;
795   PetscReal        rs;
796   FactorShiftCtx   sctx;
797   const MatScalar *aa = a->a, *vtmp;
798 
799   PetscFunctionBegin;
800   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
801 
802   /* MatPivotSetUp(): initialize shift context sctx */
803   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
804 
805   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
806     const PetscInt *ddiag = a->diag;
807     sctx.shift_top        = info->zeropivot;
808     for (i = 0; i < n; i++) {
809       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
810       d    = (aa)[ddiag[i]];
811       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
812       vtmp = aa + ai[i];
813       nz   = ai[i + 1] - ai[i];
814       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
815       if (rs > sctx.shift_top) sctx.shift_top = rs;
816     }
817     sctx.shift_top *= 1.1;
818     sctx.nshift_max = 5;
819     sctx.shift_lo   = 0.;
820     sctx.shift_hi   = 1.;
821   }
822 
823   PetscCall(ISGetIndices(isrow, &r));
824   PetscCall(ISGetIndices(isicol, &ic));
825   PetscCall(PetscMalloc1(n + 1, &rtmp));
826   PetscCall(PetscArrayzero(rtmp, n + 1));
827   ics = ic;
828 
829 #if defined(MV)
830   sctx.shift_top      = 0.;
831   sctx.nshift_max     = 0;
832   sctx.shift_lo       = 0.;
833   sctx.shift_hi       = 0.;
834   sctx.shift_fraction = 0.;
835 
836   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
837     sctx.shift_top = 0.;
838     for (i = 0; i < n; i++) {
839       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
840       d  = (a->a)[diag[i]];
841       rs = -PetscAbsScalar(d) - PetscRealPart(d);
842       v  = a->a + ai[i];
843       nz = ai[i + 1] - ai[i];
844       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
845       if (rs > sctx.shift_top) sctx.shift_top = rs;
846     }
847     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
848     sctx.shift_top *= 1.1;
849     sctx.nshift_max = 5;
850     sctx.shift_lo   = 0.;
851     sctx.shift_hi   = 1.;
852   }
853 
854   sctx.shift_amount = 0.;
855   sctx.nshift       = 0;
856 #endif
857 
858   do {
859     sctx.newshift = PETSC_FALSE;
860     for (i = 0; i < n; i++) {
861       /* load in initial unfactored row */
862       nz    = ai[r[i] + 1] - ai[r[i]];
863       ajtmp = aj + ai[r[i]];
864       v     = a->a + ai[r[i]];
865       /* sort permuted ajtmp and values v accordingly */
866       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
867       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
868 
869       diag[r[i]] = ai[r[i]];
870       for (j = 0; j < nz; j++) {
871         rtmp[ajtmp[j]] = v[j];
872         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
873       }
874       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
875 
876       row = *ajtmp++;
877       while (row < i) {
878         pc = rtmp + row;
879         if (*pc != 0.0) {
880           pv = a->a + diag[r[row]];
881           pj = aj + diag[r[row]] + 1;
882 
883           multiplier = *pc / *pv++;
884           *pc        = multiplier;
885           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
886           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
887           PetscCall(PetscLogFlops(1 + 2.0 * nz));
888         }
889         row = *ajtmp++;
890       }
891       /* finished row so overwrite it onto a->a */
892       pv     = a->a + ai[r[i]];
893       pj     = aj + ai[r[i]];
894       nz     = ai[r[i] + 1] - ai[r[i]];
895       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
896 
897       rs = 0.0;
898       for (j = 0; j < nz; j++) {
899         pv[j] = rtmp[pj[j]];
900         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
901       }
902 
903       sctx.rs = rs;
904       sctx.pv = pv[nbdiag];
905       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
906       if (sctx.newshift) break;
907       pv[nbdiag] = sctx.pv;
908     }
909 
910     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
911       /*
912        * if no shift in this attempt & shifting & started shifting & can refine,
913        * then try lower shift
914        */
915       sctx.shift_hi       = sctx.shift_fraction;
916       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
917       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
918       sctx.newshift       = PETSC_TRUE;
919       sctx.nshift++;
920     }
921   } while (sctx.newshift);
922 
923   /* invert diagonal entries for simpler triangular solves */
924   for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];
925 
926   PetscCall(PetscFree(rtmp));
927   PetscCall(ISRestoreIndices(isicol, &ic));
928   PetscCall(ISRestoreIndices(isrow, &r));
929 
930   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
931   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
932   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
933   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
934 
935   A->assembled    = PETSC_TRUE;
936   A->preallocated = PETSC_TRUE;
937 
938   PetscCall(PetscLogFlops(A->cmap->n));
939   if (sctx.nshift) {
940     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
941       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
942     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
943       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
944     }
945   }
946   PetscFunctionReturn(PETSC_SUCCESS);
947 }
948 
949 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
950 {
951   Mat C;
952 
953   PetscFunctionBegin;
954   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
955   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
956   PetscCall(MatLUFactorNumeric(C, A, info));
957 
958   A->ops->solve          = C->ops->solve;
959   A->ops->solvetranspose = C->ops->solvetranspose;
960 
961   PetscCall(MatHeaderMerge(A, &C));
962   PetscFunctionReturn(PETSC_SUCCESS);
963 }
964 
965 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
966 {
967   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
968   IS                 iscol = a->col, isrow = a->row;
969   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
970   PetscInt           nz;
971   const PetscInt    *rout, *cout, *r, *c;
972   PetscScalar       *x, *tmp, *tmps, sum;
973   const PetscScalar *b;
974   const MatScalar   *aa = a->a, *v;
975 
976   PetscFunctionBegin;
977   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
978 
979   PetscCall(VecGetArrayRead(bb, &b));
980   PetscCall(VecGetArrayWrite(xx, &x));
981   tmp = a->solve_work;
982 
983   PetscCall(ISGetIndices(isrow, &rout));
984   r = rout;
985   PetscCall(ISGetIndices(iscol, &cout));
986   c = cout + (n - 1);
987 
988   /* forward solve the lower triangular */
989   tmp[0] = b[*r++];
990   tmps   = tmp;
991   for (i = 1; i < n; i++) {
992     v   = aa + ai[i];
993     vi  = aj + ai[i];
994     nz  = a->diag[i] - ai[i];
995     sum = b[*r++];
996     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
997     tmp[i] = sum;
998   }
999 
1000   /* backward solve the upper triangular */
1001   for (i = n - 1; i >= 0; i--) {
1002     v   = aa + a->diag[i] + 1;
1003     vi  = aj + a->diag[i] + 1;
1004     nz  = ai[i + 1] - a->diag[i] - 1;
1005     sum = tmp[i];
1006     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1007     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
1008   }
1009 
1010   PetscCall(ISRestoreIndices(isrow, &rout));
1011   PetscCall(ISRestoreIndices(iscol, &cout));
1012   PetscCall(VecRestoreArrayRead(bb, &b));
1013   PetscCall(VecRestoreArrayWrite(xx, &x));
1014   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1015   PetscFunctionReturn(PETSC_SUCCESS);
1016 }
1017 
1018 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1019 {
1020   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1021   IS                 iscol = a->col, isrow = a->row;
1022   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1023   PetscInt           nz, neq, ldb, ldx;
1024   const PetscInt    *rout, *cout, *r, *c;
1025   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
1026   const PetscScalar *b, *aa  = a->a, *v;
1027   PetscBool          isdense;
1028 
1029   PetscFunctionBegin;
1030   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1031   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1032   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1033   if (X != B) {
1034     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1035     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1036   }
1037   PetscCall(MatDenseGetArrayRead(B, &b));
1038   PetscCall(MatDenseGetLDA(B, &ldb));
1039   PetscCall(MatDenseGetArray(X, &x));
1040   PetscCall(MatDenseGetLDA(X, &ldx));
1041   PetscCall(ISGetIndices(isrow, &rout));
1042   r = rout;
1043   PetscCall(ISGetIndices(iscol, &cout));
1044   c = cout;
1045   for (neq = 0; neq < B->cmap->n; neq++) {
1046     /* forward solve the lower triangular */
1047     tmp[0] = b[r[0]];
1048     tmps   = tmp;
1049     for (i = 1; i < n; i++) {
1050       v   = aa + ai[i];
1051       vi  = aj + ai[i];
1052       nz  = a->diag[i] - ai[i];
1053       sum = b[r[i]];
1054       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1055       tmp[i] = sum;
1056     }
1057     /* backward solve the upper triangular */
1058     for (i = n - 1; i >= 0; i--) {
1059       v   = aa + a->diag[i] + 1;
1060       vi  = aj + a->diag[i] + 1;
1061       nz  = ai[i + 1] - a->diag[i] - 1;
1062       sum = tmp[i];
1063       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1064       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1065     }
1066     b += ldb;
1067     x += ldx;
1068   }
1069   PetscCall(ISRestoreIndices(isrow, &rout));
1070   PetscCall(ISRestoreIndices(iscol, &cout));
1071   PetscCall(MatDenseRestoreArrayRead(B, &b));
1072   PetscCall(MatDenseRestoreArray(X, &x));
1073   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1074   PetscFunctionReturn(PETSC_SUCCESS);
1075 }
1076 
1077 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1078 {
1079   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1080   IS                 iscol = a->col, isrow = a->row;
1081   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1082   PetscInt           nz, neq, ldb, ldx;
1083   const PetscInt    *rout, *cout, *r, *c;
1084   PetscScalar       *x, *tmp = a->solve_work, sum;
1085   const PetscScalar *b, *aa  = a->a, *v;
1086   PetscBool          isdense;
1087 
1088   PetscFunctionBegin;
1089   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1090   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1091   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1092   if (X != B) {
1093     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1094     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1095   }
1096   PetscCall(MatDenseGetArrayRead(B, &b));
1097   PetscCall(MatDenseGetLDA(B, &ldb));
1098   PetscCall(MatDenseGetArray(X, &x));
1099   PetscCall(MatDenseGetLDA(X, &ldx));
1100   PetscCall(ISGetIndices(isrow, &rout));
1101   r = rout;
1102   PetscCall(ISGetIndices(iscol, &cout));
1103   c = cout;
1104   for (neq = 0; neq < B->cmap->n; neq++) {
1105     /* forward solve the lower triangular */
1106     tmp[0] = b[r[0]];
1107     v      = aa;
1108     vi     = aj;
1109     for (i = 1; i < n; i++) {
1110       nz  = ai[i + 1] - ai[i];
1111       sum = b[r[i]];
1112       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1113       tmp[i] = sum;
1114       v += nz;
1115       vi += nz;
1116     }
1117     /* backward solve the upper triangular */
1118     for (i = n - 1; i >= 0; i--) {
1119       v   = aa + adiag[i + 1] + 1;
1120       vi  = aj + adiag[i + 1] + 1;
1121       nz  = adiag[i] - adiag[i + 1] - 1;
1122       sum = tmp[i];
1123       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1124       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1125     }
1126     b += ldb;
1127     x += ldx;
1128   }
1129   PetscCall(ISRestoreIndices(isrow, &rout));
1130   PetscCall(ISRestoreIndices(iscol, &cout));
1131   PetscCall(MatDenseRestoreArrayRead(B, &b));
1132   PetscCall(MatDenseRestoreArray(X, &x));
1133   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1134   PetscFunctionReturn(PETSC_SUCCESS);
1135 }
1136 
1137 PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
1138 {
1139   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1140   IS                 iscol = a->col, isrow = a->row;
1141   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, j;
1142   PetscInt           nz, neq, ldb, ldx;
1143   const PetscInt    *rout, *cout, *r, *c;
1144   PetscScalar       *x, *tmp = a->solve_work, s1;
1145   const PetscScalar *b, *aa  = a->a, *v;
1146   PetscBool          isdense;
1147 
1148   PetscFunctionBegin;
1149   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1150   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1151   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1152   if (X != B) {
1153     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1154     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1155   }
1156   PetscCall(MatDenseGetArrayRead(B, &b));
1157   PetscCall(MatDenseGetLDA(B, &ldb));
1158   PetscCall(MatDenseGetArray(X, &x));
1159   PetscCall(MatDenseGetLDA(X, &ldx));
1160   tmp = a->solve_work;
1161   PetscCall(ISGetIndices(isrow, &rout));
1162   r = rout;
1163   PetscCall(ISGetIndices(iscol, &cout));
1164   c = cout;
1165   for (neq = 0; neq < B->cmap->n; neq++) {
1166     /* copy the b into temp work space according to permutation */
1167     for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1168 
1169     /* forward solve the U^T */
1170     for (i = 0; i < n; i++) {
1171       v  = aa + adiag[i + 1] + 1;
1172       vi = aj + adiag[i + 1] + 1;
1173       nz = adiag[i] - adiag[i + 1] - 1;
1174       s1 = tmp[i];
1175       s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1176       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1177       tmp[i] = s1;
1178     }
1179 
1180     /* backward solve the L^T */
1181     for (i = n - 1; i >= 0; i--) {
1182       v  = aa + ai[i];
1183       vi = aj + ai[i];
1184       nz = ai[i + 1] - ai[i];
1185       s1 = tmp[i];
1186       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1187     }
1188 
1189     /* copy tmp into x according to permutation */
1190     for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1191     b += ldb;
1192     x += ldx;
1193   }
1194   PetscCall(ISRestoreIndices(isrow, &rout));
1195   PetscCall(ISRestoreIndices(iscol, &cout));
1196   PetscCall(MatDenseRestoreArrayRead(B, &b));
1197   PetscCall(MatDenseRestoreArray(X, &x));
1198   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1199   PetscFunctionReturn(PETSC_SUCCESS);
1200 }
1201 
1202 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1203 {
1204   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1205   IS                 iscol = a->col, isrow = a->row;
1206   const PetscInt    *r, *c, *rout, *cout;
1207   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1208   PetscInt           nz, row;
1209   PetscScalar       *x, *tmp, *tmps, sum;
1210   const PetscScalar *b;
1211   const MatScalar   *aa = a->a, *v;
1212 
1213   PetscFunctionBegin;
1214   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1215 
1216   PetscCall(VecGetArrayRead(bb, &b));
1217   PetscCall(VecGetArrayWrite(xx, &x));
1218   tmp = a->solve_work;
1219 
1220   PetscCall(ISGetIndices(isrow, &rout));
1221   r = rout;
1222   PetscCall(ISGetIndices(iscol, &cout));
1223   c = cout + (n - 1);
1224 
1225   /* forward solve the lower triangular */
1226   tmp[0] = b[*r++];
1227   tmps   = tmp;
1228   for (row = 1; row < n; row++) {
1229     i   = rout[row]; /* permuted row */
1230     v   = aa + ai[i];
1231     vi  = aj + ai[i];
1232     nz  = a->diag[i] - ai[i];
1233     sum = b[*r++];
1234     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1235     tmp[row] = sum;
1236   }
1237 
1238   /* backward solve the upper triangular */
1239   for (row = n - 1; row >= 0; row--) {
1240     i   = rout[row]; /* permuted row */
1241     v   = aa + a->diag[i] + 1;
1242     vi  = aj + a->diag[i] + 1;
1243     nz  = ai[i + 1] - a->diag[i] - 1;
1244     sum = tmp[row];
1245     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1246     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1247   }
1248 
1249   PetscCall(ISRestoreIndices(isrow, &rout));
1250   PetscCall(ISRestoreIndices(iscol, &cout));
1251   PetscCall(VecRestoreArrayRead(bb, &b));
1252   PetscCall(VecRestoreArrayWrite(xx, &x));
1253   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1254   PetscFunctionReturn(PETSC_SUCCESS);
1255 }
1256 
1257 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1258 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1259 {
1260   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1261   PetscInt           n  = A->rmap->n;
1262   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1263   PetscScalar       *x;
1264   const PetscScalar *b;
1265   const MatScalar   *aa = a->a;
1266 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1267   PetscInt         adiag_i, i, nz, ai_i;
1268   const PetscInt  *vi;
1269   const MatScalar *v;
1270   PetscScalar      sum;
1271 #endif
1272 
1273   PetscFunctionBegin;
1274   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1275 
1276   PetscCall(VecGetArrayRead(bb, &b));
1277   PetscCall(VecGetArrayWrite(xx, &x));
1278 
1279 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1280   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1281 #else
1282   /* forward solve the lower triangular */
1283   x[0] = b[0];
1284   for (i = 1; i < n; i++) {
1285     ai_i = ai[i];
1286     v    = aa + ai_i;
1287     vi   = aj + ai_i;
1288     nz   = adiag[i] - ai_i;
1289     sum  = b[i];
1290     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1291     x[i] = sum;
1292   }
1293 
1294   /* backward solve the upper triangular */
1295   for (i = n - 1; i >= 0; i--) {
1296     adiag_i = adiag[i];
1297     v       = aa + adiag_i + 1;
1298     vi      = aj + adiag_i + 1;
1299     nz      = ai[i + 1] - adiag_i - 1;
1300     sum     = x[i];
1301     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1302     x[i] = sum * aa[adiag_i];
1303   }
1304 #endif
1305   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1306   PetscCall(VecRestoreArrayRead(bb, &b));
1307   PetscCall(VecRestoreArrayWrite(xx, &x));
1308   PetscFunctionReturn(PETSC_SUCCESS);
1309 }
1310 
1311 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1312 {
1313   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1314   IS                 iscol = a->col, isrow = a->row;
1315   PetscInt           i, n                  = A->rmap->n, j;
1316   PetscInt           nz;
1317   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1318   PetscScalar       *x, *tmp, sum;
1319   const PetscScalar *b;
1320   const MatScalar   *aa = a->a, *v;
1321 
1322   PetscFunctionBegin;
1323   if (yy != xx) PetscCall(VecCopy(yy, xx));
1324 
1325   PetscCall(VecGetArrayRead(bb, &b));
1326   PetscCall(VecGetArray(xx, &x));
1327   tmp = a->solve_work;
1328 
1329   PetscCall(ISGetIndices(isrow, &rout));
1330   r = rout;
1331   PetscCall(ISGetIndices(iscol, &cout));
1332   c = cout + (n - 1);
1333 
1334   /* forward solve the lower triangular */
1335   tmp[0] = b[*r++];
1336   for (i = 1; i < n; i++) {
1337     v   = aa + ai[i];
1338     vi  = aj + ai[i];
1339     nz  = a->diag[i] - ai[i];
1340     sum = b[*r++];
1341     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1342     tmp[i] = sum;
1343   }
1344 
1345   /* backward solve the upper triangular */
1346   for (i = n - 1; i >= 0; i--) {
1347     v   = aa + a->diag[i] + 1;
1348     vi  = aj + a->diag[i] + 1;
1349     nz  = ai[i + 1] - a->diag[i] - 1;
1350     sum = tmp[i];
1351     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1352     tmp[i] = sum * aa[a->diag[i]];
1353     x[*c--] += tmp[i];
1354   }
1355 
1356   PetscCall(ISRestoreIndices(isrow, &rout));
1357   PetscCall(ISRestoreIndices(iscol, &cout));
1358   PetscCall(VecRestoreArrayRead(bb, &b));
1359   PetscCall(VecRestoreArray(xx, &x));
1360   PetscCall(PetscLogFlops(2.0 * a->nz));
1361   PetscFunctionReturn(PETSC_SUCCESS);
1362 }
1363 
1364 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1365 {
1366   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1367   IS                 iscol = a->col, isrow = a->row;
1368   PetscInt           i, n                  = A->rmap->n, j;
1369   PetscInt           nz;
1370   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1371   PetscScalar       *x, *tmp, sum;
1372   const PetscScalar *b;
1373   const MatScalar   *aa = a->a, *v;
1374 
1375   PetscFunctionBegin;
1376   if (yy != xx) PetscCall(VecCopy(yy, xx));
1377 
1378   PetscCall(VecGetArrayRead(bb, &b));
1379   PetscCall(VecGetArray(xx, &x));
1380   tmp = a->solve_work;
1381 
1382   PetscCall(ISGetIndices(isrow, &rout));
1383   r = rout;
1384   PetscCall(ISGetIndices(iscol, &cout));
1385   c = cout;
1386 
1387   /* forward solve the lower triangular */
1388   tmp[0] = b[r[0]];
1389   v      = aa;
1390   vi     = aj;
1391   for (i = 1; i < n; i++) {
1392     nz  = ai[i + 1] - ai[i];
1393     sum = b[r[i]];
1394     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1395     tmp[i] = sum;
1396     v += nz;
1397     vi += nz;
1398   }
1399 
1400   /* backward solve the upper triangular */
1401   v  = aa + adiag[n - 1];
1402   vi = aj + adiag[n - 1];
1403   for (i = n - 1; i >= 0; i--) {
1404     nz  = adiag[i] - adiag[i + 1] - 1;
1405     sum = tmp[i];
1406     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1407     tmp[i] = sum * v[nz];
1408     x[c[i]] += tmp[i];
1409     v += nz + 1;
1410     vi += nz + 1;
1411   }
1412 
1413   PetscCall(ISRestoreIndices(isrow, &rout));
1414   PetscCall(ISRestoreIndices(iscol, &cout));
1415   PetscCall(VecRestoreArrayRead(bb, &b));
1416   PetscCall(VecRestoreArray(xx, &x));
1417   PetscCall(PetscLogFlops(2.0 * a->nz));
1418   PetscFunctionReturn(PETSC_SUCCESS);
1419 }
1420 
1421 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1422 {
1423   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1424   IS                 iscol = a->col, isrow = a->row;
1425   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1426   PetscInt           i, n = A->rmap->n, j;
1427   PetscInt           nz;
1428   PetscScalar       *x, *tmp, s1;
1429   const MatScalar   *aa = a->a, *v;
1430   const PetscScalar *b;
1431 
1432   PetscFunctionBegin;
1433   PetscCall(VecGetArrayRead(bb, &b));
1434   PetscCall(VecGetArrayWrite(xx, &x));
1435   tmp = a->solve_work;
1436 
1437   PetscCall(ISGetIndices(isrow, &rout));
1438   r = rout;
1439   PetscCall(ISGetIndices(iscol, &cout));
1440   c = cout;
1441 
1442   /* copy the b into temp work space according to permutation */
1443   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1444 
1445   /* forward solve the U^T */
1446   for (i = 0; i < n; i++) {
1447     v  = aa + diag[i];
1448     vi = aj + diag[i] + 1;
1449     nz = ai[i + 1] - diag[i] - 1;
1450     s1 = tmp[i];
1451     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1452     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1453     tmp[i] = s1;
1454   }
1455 
1456   /* backward solve the L^T */
1457   for (i = n - 1; i >= 0; i--) {
1458     v  = aa + diag[i] - 1;
1459     vi = aj + diag[i] - 1;
1460     nz = diag[i] - ai[i];
1461     s1 = tmp[i];
1462     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1463   }
1464 
1465   /* copy tmp into x according to permutation */
1466   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1467 
1468   PetscCall(ISRestoreIndices(isrow, &rout));
1469   PetscCall(ISRestoreIndices(iscol, &cout));
1470   PetscCall(VecRestoreArrayRead(bb, &b));
1471   PetscCall(VecRestoreArrayWrite(xx, &x));
1472 
1473   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1474   PetscFunctionReturn(PETSC_SUCCESS);
1475 }
1476 
1477 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1478 {
1479   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1480   IS                 iscol = a->col, isrow = a->row;
1481   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1482   PetscInt           i, n = A->rmap->n, j;
1483   PetscInt           nz;
1484   PetscScalar       *x, *tmp, s1;
1485   const MatScalar   *aa = a->a, *v;
1486   const PetscScalar *b;
1487 
1488   PetscFunctionBegin;
1489   PetscCall(VecGetArrayRead(bb, &b));
1490   PetscCall(VecGetArrayWrite(xx, &x));
1491   tmp = a->solve_work;
1492 
1493   PetscCall(ISGetIndices(isrow, &rout));
1494   r = rout;
1495   PetscCall(ISGetIndices(iscol, &cout));
1496   c = cout;
1497 
1498   /* copy the b into temp work space according to permutation */
1499   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1500 
1501   /* forward solve the U^T */
1502   for (i = 0; i < n; i++) {
1503     v  = aa + adiag[i + 1] + 1;
1504     vi = aj + adiag[i + 1] + 1;
1505     nz = adiag[i] - adiag[i + 1] - 1;
1506     s1 = tmp[i];
1507     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1508     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1509     tmp[i] = s1;
1510   }
1511 
1512   /* backward solve the L^T */
1513   for (i = n - 1; i >= 0; i--) {
1514     v  = aa + ai[i];
1515     vi = aj + ai[i];
1516     nz = ai[i + 1] - ai[i];
1517     s1 = tmp[i];
1518     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1519   }
1520 
1521   /* copy tmp into x according to permutation */
1522   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1523 
1524   PetscCall(ISRestoreIndices(isrow, &rout));
1525   PetscCall(ISRestoreIndices(iscol, &cout));
1526   PetscCall(VecRestoreArrayRead(bb, &b));
1527   PetscCall(VecRestoreArrayWrite(xx, &x));
1528 
1529   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1530   PetscFunctionReturn(PETSC_SUCCESS);
1531 }
1532 
1533 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1534 {
1535   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1536   IS                 iscol = a->col, isrow = a->row;
1537   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1538   PetscInt           i, n = A->rmap->n, j;
1539   PetscInt           nz;
1540   PetscScalar       *x, *tmp, s1;
1541   const MatScalar   *aa = a->a, *v;
1542   const PetscScalar *b;
1543 
1544   PetscFunctionBegin;
1545   if (zz != xx) PetscCall(VecCopy(zz, xx));
1546   PetscCall(VecGetArrayRead(bb, &b));
1547   PetscCall(VecGetArray(xx, &x));
1548   tmp = a->solve_work;
1549 
1550   PetscCall(ISGetIndices(isrow, &rout));
1551   r = rout;
1552   PetscCall(ISGetIndices(iscol, &cout));
1553   c = cout;
1554 
1555   /* copy the b into temp work space according to permutation */
1556   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1557 
1558   /* forward solve the U^T */
1559   for (i = 0; i < n; i++) {
1560     v  = aa + diag[i];
1561     vi = aj + diag[i] + 1;
1562     nz = ai[i + 1] - diag[i] - 1;
1563     s1 = tmp[i];
1564     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1565     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1566     tmp[i] = s1;
1567   }
1568 
1569   /* backward solve the L^T */
1570   for (i = n - 1; i >= 0; i--) {
1571     v  = aa + diag[i] - 1;
1572     vi = aj + diag[i] - 1;
1573     nz = diag[i] - ai[i];
1574     s1 = tmp[i];
1575     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1576   }
1577 
1578   /* copy tmp into x according to permutation */
1579   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1580 
1581   PetscCall(ISRestoreIndices(isrow, &rout));
1582   PetscCall(ISRestoreIndices(iscol, &cout));
1583   PetscCall(VecRestoreArrayRead(bb, &b));
1584   PetscCall(VecRestoreArray(xx, &x));
1585 
1586   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1587   PetscFunctionReturn(PETSC_SUCCESS);
1588 }
1589 
1590 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1591 {
1592   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1593   IS                 iscol = a->col, isrow = a->row;
1594   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1595   PetscInt           i, n = A->rmap->n, j;
1596   PetscInt           nz;
1597   PetscScalar       *x, *tmp, s1;
1598   const MatScalar   *aa = a->a, *v;
1599   const PetscScalar *b;
1600 
1601   PetscFunctionBegin;
1602   if (zz != xx) PetscCall(VecCopy(zz, xx));
1603   PetscCall(VecGetArrayRead(bb, &b));
1604   PetscCall(VecGetArray(xx, &x));
1605   tmp = a->solve_work;
1606 
1607   PetscCall(ISGetIndices(isrow, &rout));
1608   r = rout;
1609   PetscCall(ISGetIndices(iscol, &cout));
1610   c = cout;
1611 
1612   /* copy the b into temp work space according to permutation */
1613   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1614 
1615   /* forward solve the U^T */
1616   for (i = 0; i < n; i++) {
1617     v  = aa + adiag[i + 1] + 1;
1618     vi = aj + adiag[i + 1] + 1;
1619     nz = adiag[i] - adiag[i + 1] - 1;
1620     s1 = tmp[i];
1621     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1622     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1623     tmp[i] = s1;
1624   }
1625 
1626   /* backward solve the L^T */
1627   for (i = n - 1; i >= 0; i--) {
1628     v  = aa + ai[i];
1629     vi = aj + ai[i];
1630     nz = ai[i + 1] - ai[i];
1631     s1 = tmp[i];
1632     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1633   }
1634 
1635   /* copy tmp into x according to permutation */
1636   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1637 
1638   PetscCall(ISRestoreIndices(isrow, &rout));
1639   PetscCall(ISRestoreIndices(iscol, &cout));
1640   PetscCall(VecRestoreArrayRead(bb, &b));
1641   PetscCall(VecRestoreArray(xx, &x));
1642 
1643   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1644   PetscFunctionReturn(PETSC_SUCCESS);
1645 }
1646 
1647 /*
1648    ilu() under revised new data structure.
1649    Factored arrays bj and ba are stored as
1650      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1651 
1652    bi=fact->i is an array of size n+1, in which
1653      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1654      bi[n]:  points to L(n-1,n-1)+1
1655 
1656   bdiag=fact->diag is an array of size n+1,in which
1657      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1658      bdiag[n]: points to entry of U(n-1,0)-1
1659 
1660    U(i,:) contains bdiag[i] as its last entry, i.e.,
1661     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1662 */
1663 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1664 {
1665   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1666   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1667   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1668   IS             isicol;
1669 
1670   PetscFunctionBegin;
1671   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1672   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1673   b = (Mat_SeqAIJ *)(fact)->data;
1674 
1675   /* allocate matrix arrays for new data structure */
1676   PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i));
1677 
1678   b->singlemalloc = PETSC_TRUE;
1679   if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1680   bdiag = b->diag;
1681 
1682   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1683 
1684   /* set bi and bj with new data structure */
1685   bi = b->i;
1686   bj = b->j;
1687 
1688   /* L part */
1689   bi[0] = 0;
1690   for (i = 0; i < n; i++) {
1691     nz        = adiag[i] - ai[i];
1692     bi[i + 1] = bi[i] + nz;
1693     aj        = a->j + ai[i];
1694     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1695   }
1696 
1697   /* U part */
1698   bdiag[n] = bi[n] - 1;
1699   for (i = n - 1; i >= 0; i--) {
1700     nz = ai[i + 1] - adiag[i] - 1;
1701     aj = a->j + adiag[i] + 1;
1702     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1703     /* diag[i] */
1704     bj[k++]  = i;
1705     bdiag[i] = bdiag[i + 1] + nz + 1;
1706   }
1707 
1708   fact->factortype             = MAT_FACTOR_ILU;
1709   fact->info.factor_mallocs    = 0;
1710   fact->info.fill_ratio_given  = info->fill;
1711   fact->info.fill_ratio_needed = 1.0;
1712   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1713   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1714 
1715   b       = (Mat_SeqAIJ *)(fact)->data;
1716   b->row  = isrow;
1717   b->col  = iscol;
1718   b->icol = isicol;
1719   PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
1720   PetscCall(PetscObjectReference((PetscObject)isrow));
1721   PetscCall(PetscObjectReference((PetscObject)iscol));
1722   PetscFunctionReturn(PETSC_SUCCESS);
1723 }
1724 
1725 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1726 {
1727   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1728   IS                 isicol;
1729   const PetscInt    *r, *ic;
1730   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1731   PetscInt          *bi, *cols, nnz, *cols_lvl;
1732   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1733   PetscInt           i, levels, diagonal_fill;
1734   PetscBool          col_identity, row_identity, missing;
1735   PetscReal          f;
1736   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1737   PetscBT            lnkbt;
1738   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1739   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1740   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1741 
1742   PetscFunctionBegin;
1743   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1744   PetscCall(MatMissingDiagonal(A, &missing, &i));
1745   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1746 
1747   levels = (PetscInt)info->levels;
1748   PetscCall(ISIdentity(isrow, &row_identity));
1749   PetscCall(ISIdentity(iscol, &col_identity));
1750   if (!levels && row_identity && col_identity) {
1751     /* special case: ilu(0) with natural ordering */
1752     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1753     if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1754     PetscFunctionReturn(PETSC_SUCCESS);
1755   }
1756 
1757   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1758   PetscCall(ISGetIndices(isrow, &r));
1759   PetscCall(ISGetIndices(isicol, &ic));
1760 
1761   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1762   PetscCall(PetscMalloc1(n + 1, &bi));
1763   PetscCall(PetscMalloc1(n + 1, &bdiag));
1764   bi[0] = bdiag[0] = 0;
1765   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1766 
1767   /* create a linked list for storing column indices of the active row */
1768   nlnk = n + 1;
1769   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1770 
1771   /* initial FreeSpace size is f*(ai[n]+1) */
1772   f             = info->fill;
1773   diagonal_fill = (PetscInt)info->diagonal_fill;
1774   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1775   current_space = free_space;
1776   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1777   current_space_lvl = free_space_lvl;
1778   for (i = 0; i < n; i++) {
1779     nzi = 0;
1780     /* copy current row into linked list */
1781     nnz = ai[r[i] + 1] - ai[r[i]];
1782     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1783     cols   = aj + ai[r[i]];
1784     lnk[i] = -1; /* marker to indicate if diagonal exists */
1785     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1786     nzi += nlnk;
1787 
1788     /* make sure diagonal entry is included */
1789     if (diagonal_fill && lnk[i] == -1) {
1790       fm = n;
1791       while (lnk[fm] < i) fm = lnk[fm];
1792       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1793       lnk[fm]    = i;
1794       lnk_lvl[i] = 0;
1795       nzi++;
1796       dcount++;
1797     }
1798 
1799     /* add pivot rows into the active row */
1800     nzbd = 0;
1801     prow = lnk[n];
1802     while (prow < i) {
1803       nnz      = bdiag[prow];
1804       cols     = bj_ptr[prow] + nnz + 1;
1805       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1806       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1807       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1808       nzi += nlnk;
1809       prow = lnk[prow];
1810       nzbd++;
1811     }
1812     bdiag[i]  = nzbd;
1813     bi[i + 1] = bi[i] + nzi;
1814     /* if free space is not available, make more free space */
1815     if (current_space->local_remaining < nzi) {
1816       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1817       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1818       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1819       reallocs++;
1820     }
1821 
1822     /* copy data into free_space and free_space_lvl, then initialize lnk */
1823     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1824     bj_ptr[i]    = current_space->array;
1825     bjlvl_ptr[i] = current_space_lvl->array;
1826 
1827     /* make sure the active row i has diagonal entry */
1828     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1829 
1830     current_space->array += nzi;
1831     current_space->local_used += nzi;
1832     current_space->local_remaining -= nzi;
1833     current_space_lvl->array += nzi;
1834     current_space_lvl->local_used += nzi;
1835     current_space_lvl->local_remaining -= nzi;
1836   }
1837 
1838   PetscCall(ISRestoreIndices(isrow, &r));
1839   PetscCall(ISRestoreIndices(isicol, &ic));
1840   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1841   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1842   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1843 
1844   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1845   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1846   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1847 
1848 #if defined(PETSC_USE_INFO)
1849   {
1850     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1851     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1852     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1853     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1854     PetscCall(PetscInfo(A, "for best performance.\n"));
1855     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1856   }
1857 #endif
1858   /* put together the new matrix */
1859   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1860   b = (Mat_SeqAIJ *)(fact)->data;
1861 
1862   b->free_a       = PETSC_TRUE;
1863   b->free_ij      = PETSC_TRUE;
1864   b->singlemalloc = PETSC_FALSE;
1865 
1866   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
1867 
1868   b->j    = bj;
1869   b->i    = bi;
1870   b->diag = bdiag;
1871   b->ilen = NULL;
1872   b->imax = NULL;
1873   b->row  = isrow;
1874   b->col  = iscol;
1875   PetscCall(PetscObjectReference((PetscObject)isrow));
1876   PetscCall(PetscObjectReference((PetscObject)iscol));
1877   b->icol = isicol;
1878 
1879   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1880   /* In b structure:  Free imax, ilen, old a, old j.
1881      Allocate bdiag, solve_work, new a, new j */
1882   b->maxnz = b->nz = bdiag[0] + 1;
1883 
1884   (fact)->info.factor_mallocs    = reallocs;
1885   (fact)->info.fill_ratio_given  = f;
1886   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1887   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1888   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1889   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1890   PetscFunctionReturn(PETSC_SUCCESS);
1891 }
1892 
1893 #if 0
1894 // unused
1895 static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1896 {
1897   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1898   IS                 isicol;
1899   const PetscInt    *r, *ic;
1900   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1901   PetscInt          *bi, *cols, nnz, *cols_lvl;
1902   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1903   PetscInt           i, levels, diagonal_fill;
1904   PetscBool          col_identity, row_identity;
1905   PetscReal          f;
1906   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1907   PetscBT            lnkbt;
1908   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1909   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1910   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1911   PetscBool          missing;
1912 
1913   PetscFunctionBegin;
1914   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1915   PetscCall(MatMissingDiagonal(A, &missing, &i));
1916   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1917 
1918   f             = info->fill;
1919   levels        = (PetscInt)info->levels;
1920   diagonal_fill = (PetscInt)info->diagonal_fill;
1921 
1922   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1923 
1924   PetscCall(ISIdentity(isrow, &row_identity));
1925   PetscCall(ISIdentity(iscol, &col_identity));
1926   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1927     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));
1928 
1929     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1930     if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1931     fact->factortype               = MAT_FACTOR_ILU;
1932     (fact)->info.factor_mallocs    = 0;
1933     (fact)->info.fill_ratio_given  = info->fill;
1934     (fact)->info.fill_ratio_needed = 1.0;
1935 
1936     b       = (Mat_SeqAIJ *)(fact)->data;
1937     b->row  = isrow;
1938     b->col  = iscol;
1939     b->icol = isicol;
1940     PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1941     PetscCall(PetscObjectReference((PetscObject)isrow));
1942     PetscCall(PetscObjectReference((PetscObject)iscol));
1943     PetscFunctionReturn(PETSC_SUCCESS);
1944   }
1945 
1946   PetscCall(ISGetIndices(isrow, &r));
1947   PetscCall(ISGetIndices(isicol, &ic));
1948 
1949   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1950   PetscCall(PetscMalloc1(n + 1, &bi));
1951   PetscCall(PetscMalloc1(n + 1, &bdiag));
1952   bi[0] = bdiag[0] = 0;
1953 
1954   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1955 
1956   /* create a linked list for storing column indices of the active row */
1957   nlnk = n + 1;
1958   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1959 
1960   /* initial FreeSpace size is f*(ai[n]+1) */
1961   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1962   current_space = free_space;
1963   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1964   current_space_lvl = free_space_lvl;
1965 
1966   for (i = 0; i < n; i++) {
1967     nzi = 0;
1968     /* copy current row into linked list */
1969     nnz = ai[r[i] + 1] - ai[r[i]];
1970     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1971     cols   = aj + ai[r[i]];
1972     lnk[i] = -1; /* marker to indicate if diagonal exists */
1973     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1974     nzi += nlnk;
1975 
1976     /* make sure diagonal entry is included */
1977     if (diagonal_fill && lnk[i] == -1) {
1978       fm = n;
1979       while (lnk[fm] < i) fm = lnk[fm];
1980       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1981       lnk[fm]    = i;
1982       lnk_lvl[i] = 0;
1983       nzi++;
1984       dcount++;
1985     }
1986 
1987     /* add pivot rows into the active row */
1988     nzbd = 0;
1989     prow = lnk[n];
1990     while (prow < i) {
1991       nnz      = bdiag[prow];
1992       cols     = bj_ptr[prow] + nnz + 1;
1993       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1994       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1995       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1996       nzi += nlnk;
1997       prow = lnk[prow];
1998       nzbd++;
1999     }
2000     bdiag[i]  = nzbd;
2001     bi[i + 1] = bi[i] + nzi;
2002 
2003     /* if free space is not available, make more free space */
2004     if (current_space->local_remaining < nzi) {
2005       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
2006       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
2007       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
2008       reallocs++;
2009     }
2010 
2011     /* copy data into free_space and free_space_lvl, then initialize lnk */
2012     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2013     bj_ptr[i]    = current_space->array;
2014     bjlvl_ptr[i] = current_space_lvl->array;
2015 
2016     /* make sure the active row i has diagonal entry */
2017     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
2018 
2019     current_space->array += nzi;
2020     current_space->local_used += nzi;
2021     current_space->local_remaining -= nzi;
2022     current_space_lvl->array += nzi;
2023     current_space_lvl->local_used += nzi;
2024     current_space_lvl->local_remaining -= nzi;
2025   }
2026 
2027   PetscCall(ISRestoreIndices(isrow, &r));
2028   PetscCall(ISRestoreIndices(isicol, &ic));
2029 
2030   /* destroy list of free space and other temporary arrays */
2031   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
2032   PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
2033   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2034   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2035   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
2036 
2037   #if defined(PETSC_USE_INFO)
2038   {
2039     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2040     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
2041     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
2042     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
2043     PetscCall(PetscInfo(A, "for best performance.\n"));
2044     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
2045   }
2046   #endif
2047 
2048   /* put together the new matrix */
2049   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
2050   b = (Mat_SeqAIJ *)(fact)->data;
2051 
2052   b->free_a       = PETSC_TRUE;
2053   b->free_ij      = PETSC_TRUE;
2054   b->singlemalloc = PETSC_FALSE;
2055 
2056   PetscCall(PetscMalloc1(bi[n], &b->a));
2057   b->j = bj;
2058   b->i = bi;
2059   for (i = 0; i < n; i++) bdiag[i] += bi[i];
2060   b->diag = bdiag;
2061   b->ilen = NULL;
2062   b->imax = NULL;
2063   b->row  = isrow;
2064   b->col  = iscol;
2065   PetscCall(PetscObjectReference((PetscObject)isrow));
2066   PetscCall(PetscObjectReference((PetscObject)iscol));
2067   b->icol = isicol;
2068   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2069   /* In b structure:  Free imax, ilen, old a, old j.
2070      Allocate bdiag, solve_work, new a, new j */
2071   b->maxnz = b->nz = bi[n];
2072 
2073   (fact)->info.factor_mallocs    = reallocs;
2074   (fact)->info.fill_ratio_given  = f;
2075   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2076   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
2077   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2078   PetscFunctionReturn(PETSC_SUCCESS);
2079 }
2080 #endif
2081 
2082 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2083 {
2084   Mat             C  = B;
2085   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2086   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2087   IS              ip = b->row, iip = b->icol;
2088   const PetscInt *rip, *riip;
2089   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
2090   PetscInt       *ai = a->i, *aj = a->j;
2091   PetscInt        k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2092   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2093   PetscBool       perm_identity;
2094   FactorShiftCtx  sctx;
2095   PetscReal       rs;
2096   MatScalar       d, *v;
2097 
2098   PetscFunctionBegin;
2099   /* MatPivotSetUp(): initialize shift context sctx */
2100   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2101 
2102   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2103     sctx.shift_top = info->zeropivot;
2104     for (i = 0; i < mbs; i++) {
2105       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2106       d  = (aa)[a->diag[i]];
2107       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2108       v  = aa + ai[i];
2109       nz = ai[i + 1] - ai[i];
2110       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2111       if (rs > sctx.shift_top) sctx.shift_top = rs;
2112     }
2113     sctx.shift_top *= 1.1;
2114     sctx.nshift_max = 5;
2115     sctx.shift_lo   = 0.;
2116     sctx.shift_hi   = 1.;
2117   }
2118 
2119   PetscCall(ISGetIndices(ip, &rip));
2120   PetscCall(ISGetIndices(iip, &riip));
2121 
2122   /* allocate working arrays
2123      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2124      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2125   */
2126   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
2127 
2128   do {
2129     sctx.newshift = PETSC_FALSE;
2130 
2131     for (i = 0; i < mbs; i++) c2r[i] = mbs;
2132     if (mbs) il[0] = 0;
2133 
2134     for (k = 0; k < mbs; k++) {
2135       /* zero rtmp */
2136       nz    = bi[k + 1] - bi[k];
2137       bjtmp = bj + bi[k];
2138       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2139 
2140       /* load in initial unfactored row */
2141       bval = ba + bi[k];
2142       jmin = ai[rip[k]];
2143       jmax = ai[rip[k] + 1];
2144       for (j = jmin; j < jmax; j++) {
2145         col = riip[aj[j]];
2146         if (col >= k) { /* only take upper triangular entry */
2147           rtmp[col] = aa[j];
2148           *bval++   = 0.0; /* for in-place factorization */
2149         }
2150       }
2151       /* shift the diagonal of the matrix: ZeropivotApply() */
2152       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2153 
2154       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2155       dk = rtmp[k];
2156       i  = c2r[k]; /* first row to be added to k_th row  */
2157 
2158       while (i < k) {
2159         nexti = c2r[i]; /* next row to be added to k_th row */
2160 
2161         /* compute multiplier, update diag(k) and U(i,k) */
2162         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
2163         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2164         dk += uikdi * ba[ili];           /* update diag[k] */
2165         ba[ili] = uikdi;                 /* -U(i,k) */
2166 
2167         /* add multiple of row i to k-th row */
2168         jmin = ili + 1;
2169         jmax = bi[i + 1];
2170         if (jmin < jmax) {
2171           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2172           /* update il and c2r for row i */
2173           il[i]  = jmin;
2174           j      = bj[jmin];
2175           c2r[i] = c2r[j];
2176           c2r[j] = i;
2177         }
2178         i = nexti;
2179       }
2180 
2181       /* copy data into U(k,:) */
2182       rs   = 0.0;
2183       jmin = bi[k];
2184       jmax = bi[k + 1] - 1;
2185       if (jmin < jmax) {
2186         for (j = jmin; j < jmax; j++) {
2187           col   = bj[j];
2188           ba[j] = rtmp[col];
2189           rs += PetscAbsScalar(ba[j]);
2190         }
2191         /* add the k-th row into il and c2r */
2192         il[k]  = jmin;
2193         i      = bj[jmin];
2194         c2r[k] = c2r[i];
2195         c2r[i] = k;
2196       }
2197 
2198       /* MatPivotCheck() */
2199       sctx.rs = rs;
2200       sctx.pv = dk;
2201       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2202       if (sctx.newshift) break;
2203       dk = sctx.pv;
2204 
2205       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2206     }
2207   } while (sctx.newshift);
2208 
2209   PetscCall(PetscFree3(rtmp, il, c2r));
2210   PetscCall(ISRestoreIndices(ip, &rip));
2211   PetscCall(ISRestoreIndices(iip, &riip));
2212 
2213   PetscCall(ISIdentity(ip, &perm_identity));
2214   if (perm_identity) {
2215     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2216     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2217     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2218     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2219   } else {
2220     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2221     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2222     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2223     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2224   }
2225 
2226   C->assembled    = PETSC_TRUE;
2227   C->preallocated = PETSC_TRUE;
2228 
2229   PetscCall(PetscLogFlops(C->rmap->n));
2230 
2231   /* MatPivotView() */
2232   if (sctx.nshift) {
2233     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2234       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2235     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2236       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2237     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2238       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2239     }
2240   }
2241   PetscFunctionReturn(PETSC_SUCCESS);
2242 }
2243 
2244 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2245 {
2246   Mat             C  = B;
2247   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2248   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2249   IS              ip = b->row, iip = b->icol;
2250   const PetscInt *rip, *riip;
2251   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2252   PetscInt       *ai = a->i, *aj = a->j;
2253   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2254   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2255   PetscBool       perm_identity;
2256   FactorShiftCtx  sctx;
2257   PetscReal       rs;
2258   MatScalar       d, *v;
2259 
2260   PetscFunctionBegin;
2261   /* MatPivotSetUp(): initialize shift context sctx */
2262   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2263 
2264   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2265     sctx.shift_top = info->zeropivot;
2266     for (i = 0; i < mbs; i++) {
2267       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2268       d  = (aa)[a->diag[i]];
2269       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2270       v  = aa + ai[i];
2271       nz = ai[i + 1] - ai[i];
2272       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2273       if (rs > sctx.shift_top) sctx.shift_top = rs;
2274     }
2275     sctx.shift_top *= 1.1;
2276     sctx.nshift_max = 5;
2277     sctx.shift_lo   = 0.;
2278     sctx.shift_hi   = 1.;
2279   }
2280 
2281   PetscCall(ISGetIndices(ip, &rip));
2282   PetscCall(ISGetIndices(iip, &riip));
2283 
2284   /* initialization */
2285   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
2286 
2287   do {
2288     sctx.newshift = PETSC_FALSE;
2289 
2290     for (i = 0; i < mbs; i++) jl[i] = mbs;
2291     il[0] = 0;
2292 
2293     for (k = 0; k < mbs; k++) {
2294       /* zero rtmp */
2295       nz    = bi[k + 1] - bi[k];
2296       bjtmp = bj + bi[k];
2297       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2298 
2299       bval = ba + bi[k];
2300       /* initialize k-th row by the perm[k]-th row of A */
2301       jmin = ai[rip[k]];
2302       jmax = ai[rip[k] + 1];
2303       for (j = jmin; j < jmax; j++) {
2304         col = riip[aj[j]];
2305         if (col >= k) { /* only take upper triangular entry */
2306           rtmp[col] = aa[j];
2307           *bval++   = 0.0; /* for in-place factorization */
2308         }
2309       }
2310       /* shift the diagonal of the matrix */
2311       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2312 
2313       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2314       dk = rtmp[k];
2315       i  = jl[k]; /* first row to be added to k_th row  */
2316 
2317       while (i < k) {
2318         nexti = jl[i]; /* next row to be added to k_th row */
2319 
2320         /* compute multiplier, update diag(k) and U(i,k) */
2321         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
2322         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2323         dk += uikdi * ba[ili];
2324         ba[ili] = uikdi; /* -U(i,k) */
2325 
2326         /* add multiple of row i to k-th row */
2327         jmin = ili + 1;
2328         jmax = bi[i + 1];
2329         if (jmin < jmax) {
2330           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2331           /* update il and jl for row i */
2332           il[i] = jmin;
2333           j     = bj[jmin];
2334           jl[i] = jl[j];
2335           jl[j] = i;
2336         }
2337         i = nexti;
2338       }
2339 
2340       /* shift the diagonals when zero pivot is detected */
2341       /* compute rs=sum of abs(off-diagonal) */
2342       rs   = 0.0;
2343       jmin = bi[k] + 1;
2344       nz   = bi[k + 1] - jmin;
2345       bcol = bj + jmin;
2346       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
2347 
2348       sctx.rs = rs;
2349       sctx.pv = dk;
2350       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2351       if (sctx.newshift) break;
2352       dk = sctx.pv;
2353 
2354       /* copy data into U(k,:) */
2355       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2356       jmin      = bi[k] + 1;
2357       jmax      = bi[k + 1];
2358       if (jmin < jmax) {
2359         for (j = jmin; j < jmax; j++) {
2360           col   = bj[j];
2361           ba[j] = rtmp[col];
2362         }
2363         /* add the k-th row into il and jl */
2364         il[k] = jmin;
2365         i     = bj[jmin];
2366         jl[k] = jl[i];
2367         jl[i] = k;
2368       }
2369     }
2370   } while (sctx.newshift);
2371 
2372   PetscCall(PetscFree3(rtmp, il, jl));
2373   PetscCall(ISRestoreIndices(ip, &rip));
2374   PetscCall(ISRestoreIndices(iip, &riip));
2375 
2376   PetscCall(ISIdentity(ip, &perm_identity));
2377   if (perm_identity) {
2378     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2379     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2380     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2381     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2382   } else {
2383     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2384     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2385     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2386     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2387   }
2388 
2389   C->assembled    = PETSC_TRUE;
2390   C->preallocated = PETSC_TRUE;
2391 
2392   PetscCall(PetscLogFlops(C->rmap->n));
2393   if (sctx.nshift) {
2394     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2395       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2396     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2397       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2398     }
2399   }
2400   PetscFunctionReturn(PETSC_SUCCESS);
2401 }
2402 
2403 /*
2404    icc() under revised new data structure.
2405    Factored arrays bj and ba are stored as
2406      U(0,:),...,U(i,:),U(n-1,:)
2407 
2408    ui=fact->i is an array of size n+1, in which
2409    ui+
2410      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2411      ui[n]:  points to U(n-1,n-1)+1
2412 
2413   udiag=fact->diag is an array of size n,in which
2414      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2415 
2416    U(i,:) contains udiag[i] as its last entry, i.e.,
2417     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2418 */
2419 
2420 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2421 {
2422   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2423   Mat_SeqSBAIJ      *b;
2424   PetscBool          perm_identity, missing;
2425   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2426   const PetscInt    *rip, *riip;
2427   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2428   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2429   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2430   PetscReal          fill = info->fill, levels = info->levels;
2431   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2432   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2433   PetscBT            lnkbt;
2434   IS                 iperm;
2435 
2436   PetscFunctionBegin;
2437   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2438   PetscCall(MatMissingDiagonal(A, &missing, &d));
2439   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2440   PetscCall(ISIdentity(perm, &perm_identity));
2441   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2442 
2443   PetscCall(PetscMalloc1(am + 1, &ui));
2444   PetscCall(PetscMalloc1(am + 1, &udiag));
2445   ui[0] = 0;
2446 
2447   /* ICC(0) without matrix ordering: simply rearrange column indices */
2448   if (!levels && perm_identity) {
2449     for (i = 0; i < am; i++) {
2450       ncols     = ai[i + 1] - a->diag[i];
2451       ui[i + 1] = ui[i] + ncols;
2452       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2453     }
2454     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2455     cols = uj;
2456     for (i = 0; i < am; i++) {
2457       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2458       ncols = ai[i + 1] - a->diag[i] - 1;
2459       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2460       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2461     }
2462   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2463     PetscCall(ISGetIndices(iperm, &riip));
2464     PetscCall(ISGetIndices(perm, &rip));
2465 
2466     /* initialization */
2467     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2468 
2469     /* jl: linked list for storing indices of the pivot rows
2470        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2471     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2472     for (i = 0; i < am; i++) {
2473       jl[i] = am;
2474       il[i] = 0;
2475     }
2476 
2477     /* create and initialize a linked list for storing column indices of the active row k */
2478     nlnk = am + 1;
2479     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2480 
2481     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2482     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2483     current_space = free_space;
2484     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2485     current_space_lvl = free_space_lvl;
2486 
2487     for (k = 0; k < am; k++) { /* for each active row k */
2488       /* initialize lnk by the column indices of row rip[k] of A */
2489       nzk   = 0;
2490       ncols = ai[rip[k] + 1] - ai[rip[k]];
2491       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2492       ncols_upper = 0;
2493       for (j = 0; j < ncols; j++) {
2494         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2495         if (riip[i] >= k) {         /* only take upper triangular entry */
2496           ajtmp[ncols_upper] = i;
2497           ncols_upper++;
2498         }
2499       }
2500       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2501       nzk += nlnk;
2502 
2503       /* update lnk by computing fill-in for each pivot row to be merged in */
2504       prow = jl[k]; /* 1st pivot row */
2505 
2506       while (prow < k) {
2507         nextprow = jl[prow];
2508 
2509         /* merge prow into k-th row */
2510         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2511         jmax  = ui[prow + 1];
2512         ncols = jmax - jmin;
2513         i     = jmin - ui[prow];
2514         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2515         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2516         j     = *(uj - 1);
2517         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2518         nzk += nlnk;
2519 
2520         /* update il and jl for prow */
2521         if (jmin < jmax) {
2522           il[prow] = jmin;
2523           j        = *cols;
2524           jl[prow] = jl[j];
2525           jl[j]    = prow;
2526         }
2527         prow = nextprow;
2528       }
2529 
2530       /* if free space is not available, make more free space */
2531       if (current_space->local_remaining < nzk) {
2532         i = am - k + 1;                                    /* num of unfactored rows */
2533         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2534         PetscCall(PetscFreeSpaceGet(i, &current_space));
2535         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2536         reallocs++;
2537       }
2538 
2539       /* copy data into free_space and free_space_lvl, then initialize lnk */
2540       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2541       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2542 
2543       /* add the k-th row into il and jl */
2544       if (nzk > 1) {
2545         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2546         jl[k] = jl[i];
2547         jl[i] = k;
2548         il[k] = ui[k] + 1;
2549       }
2550       uj_ptr[k]     = current_space->array;
2551       uj_lvl_ptr[k] = current_space_lvl->array;
2552 
2553       current_space->array += nzk;
2554       current_space->local_used += nzk;
2555       current_space->local_remaining -= nzk;
2556 
2557       current_space_lvl->array += nzk;
2558       current_space_lvl->local_used += nzk;
2559       current_space_lvl->local_remaining -= nzk;
2560 
2561       ui[k + 1] = ui[k] + nzk;
2562     }
2563 
2564     PetscCall(ISRestoreIndices(perm, &rip));
2565     PetscCall(ISRestoreIndices(iperm, &riip));
2566     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2567     PetscCall(PetscFree(ajtmp));
2568 
2569     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2570     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2571     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
2572     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2573     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2574 
2575   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2576 
2577   /* put together the new matrix in MATSEQSBAIJ format */
2578   b               = (Mat_SeqSBAIJ *)(fact)->data;
2579   b->singlemalloc = PETSC_FALSE;
2580 
2581   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2582 
2583   b->j         = uj;
2584   b->i         = ui;
2585   b->diag      = udiag;
2586   b->free_diag = PETSC_TRUE;
2587   b->ilen      = NULL;
2588   b->imax      = NULL;
2589   b->row       = perm;
2590   b->col       = perm;
2591   PetscCall(PetscObjectReference((PetscObject)perm));
2592   PetscCall(PetscObjectReference((PetscObject)perm));
2593   b->icol          = iperm;
2594   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2595 
2596   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2597 
2598   b->maxnz = b->nz = ui[am];
2599   b->free_a        = PETSC_TRUE;
2600   b->free_ij       = PETSC_TRUE;
2601 
2602   fact->info.factor_mallocs   = reallocs;
2603   fact->info.fill_ratio_given = fill;
2604   if (ai[am] != 0) {
2605     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2606     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2607   } else {
2608     fact->info.fill_ratio_needed = 0.0;
2609   }
2610 #if defined(PETSC_USE_INFO)
2611   if (ai[am] != 0) {
2612     PetscReal af = fact->info.fill_ratio_needed;
2613     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2614     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2615     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2616   } else {
2617     PetscCall(PetscInfo(A, "Empty matrix\n"));
2618   }
2619 #endif
2620   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2621   PetscFunctionReturn(PETSC_SUCCESS);
2622 }
2623 
2624 #if 0
2625 // unused
2626 static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2627 {
2628   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2629   Mat_SeqSBAIJ      *b;
2630   PetscBool          perm_identity, missing;
2631   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2632   const PetscInt    *rip, *riip;
2633   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2634   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2635   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2636   PetscReal          fill = info->fill, levels = info->levels;
2637   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2638   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2639   PetscBT            lnkbt;
2640   IS                 iperm;
2641 
2642   PetscFunctionBegin;
2643   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2644   PetscCall(MatMissingDiagonal(A, &missing, &d));
2645   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2646   PetscCall(ISIdentity(perm, &perm_identity));
2647   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2648 
2649   PetscCall(PetscMalloc1(am + 1, &ui));
2650   PetscCall(PetscMalloc1(am + 1, &udiag));
2651   ui[0] = 0;
2652 
2653   /* ICC(0) without matrix ordering: simply copies fill pattern */
2654   if (!levels && perm_identity) {
2655     for (i = 0; i < am; i++) {
2656       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2657       udiag[i]  = ui[i];
2658     }
2659     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2660     cols = uj;
2661     for (i = 0; i < am; i++) {
2662       aj    = a->j + a->diag[i];
2663       ncols = ui[i + 1] - ui[i];
2664       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2665     }
2666   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2667     PetscCall(ISGetIndices(iperm, &riip));
2668     PetscCall(ISGetIndices(perm, &rip));
2669 
2670     /* initialization */
2671     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2672 
2673     /* jl: linked list for storing indices of the pivot rows
2674        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2675     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2676     for (i = 0; i < am; i++) {
2677       jl[i] = am;
2678       il[i] = 0;
2679     }
2680 
2681     /* create and initialize a linked list for storing column indices of the active row k */
2682     nlnk = am + 1;
2683     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2684 
2685     /* initial FreeSpace size is fill*(ai[am]+1) */
2686     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2687     current_space = free_space;
2688     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2689     current_space_lvl = free_space_lvl;
2690 
2691     for (k = 0; k < am; k++) { /* for each active row k */
2692       /* initialize lnk by the column indices of row rip[k] of A */
2693       nzk   = 0;
2694       ncols = ai[rip[k] + 1] - ai[rip[k]];
2695       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2696       ncols_upper = 0;
2697       for (j = 0; j < ncols; j++) {
2698         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2699         if (riip[i] >= k) {         /* only take upper triangular entry */
2700           ajtmp[ncols_upper] = i;
2701           ncols_upper++;
2702         }
2703       }
2704       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2705       nzk += nlnk;
2706 
2707       /* update lnk by computing fill-in for each pivot row to be merged in */
2708       prow = jl[k]; /* 1st pivot row */
2709 
2710       while (prow < k) {
2711         nextprow = jl[prow];
2712 
2713         /* merge prow into k-th row */
2714         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2715         jmax  = ui[prow + 1];
2716         ncols = jmax - jmin;
2717         i     = jmin - ui[prow];
2718         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2719         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2720         j     = *(uj - 1);
2721         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2722         nzk += nlnk;
2723 
2724         /* update il and jl for prow */
2725         if (jmin < jmax) {
2726           il[prow] = jmin;
2727           j        = *cols;
2728           jl[prow] = jl[j];
2729           jl[j]    = prow;
2730         }
2731         prow = nextprow;
2732       }
2733 
2734       /* if free space is not available, make more free space */
2735       if (current_space->local_remaining < nzk) {
2736         i = am - k + 1;                                    /* num of unfactored rows */
2737         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2738         PetscCall(PetscFreeSpaceGet(i, &current_space));
2739         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2740         reallocs++;
2741       }
2742 
2743       /* copy data into free_space and free_space_lvl, then initialize lnk */
2744       PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2745       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2746 
2747       /* add the k-th row into il and jl */
2748       if (nzk > 1) {
2749         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2750         jl[k] = jl[i];
2751         jl[i] = k;
2752         il[k] = ui[k] + 1;
2753       }
2754       uj_ptr[k]     = current_space->array;
2755       uj_lvl_ptr[k] = current_space_lvl->array;
2756 
2757       current_space->array += nzk;
2758       current_space->local_used += nzk;
2759       current_space->local_remaining -= nzk;
2760 
2761       current_space_lvl->array += nzk;
2762       current_space_lvl->local_used += nzk;
2763       current_space_lvl->local_remaining -= nzk;
2764 
2765       ui[k + 1] = ui[k] + nzk;
2766     }
2767 
2768   #if defined(PETSC_USE_INFO)
2769     if (ai[am] != 0) {
2770       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2771       PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2772       PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2773       PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2774     } else {
2775       PetscCall(PetscInfo(A, "Empty matrix\n"));
2776     }
2777   #endif
2778 
2779     PetscCall(ISRestoreIndices(perm, &rip));
2780     PetscCall(ISRestoreIndices(iperm, &riip));
2781     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2782     PetscCall(PetscFree(ajtmp));
2783 
2784     /* destroy list of free space and other temporary array(s) */
2785     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2786     PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2787     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2788     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2789 
2790   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2791 
2792   /* put together the new matrix in MATSEQSBAIJ format */
2793 
2794   b               = (Mat_SeqSBAIJ *)fact->data;
2795   b->singlemalloc = PETSC_FALSE;
2796 
2797   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2798 
2799   b->j         = uj;
2800   b->i         = ui;
2801   b->diag      = udiag;
2802   b->free_diag = PETSC_TRUE;
2803   b->ilen      = NULL;
2804   b->imax      = NULL;
2805   b->row       = perm;
2806   b->col       = perm;
2807 
2808   PetscCall(PetscObjectReference((PetscObject)perm));
2809   PetscCall(PetscObjectReference((PetscObject)perm));
2810 
2811   b->icol          = iperm;
2812   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2813   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2814   b->maxnz = b->nz = ui[am];
2815   b->free_a        = PETSC_TRUE;
2816   b->free_ij       = PETSC_TRUE;
2817 
2818   fact->info.factor_mallocs   = reallocs;
2819   fact->info.fill_ratio_given = fill;
2820   if (ai[am] != 0) {
2821     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2822   } else {
2823     fact->info.fill_ratio_needed = 0.0;
2824   }
2825   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2826   PetscFunctionReturn(PETSC_SUCCESS);
2827 }
2828 #endif
2829 
2830 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2831 {
2832   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2833   Mat_SeqSBAIJ      *b;
2834   PetscBool          perm_identity, missing;
2835   PetscReal          fill = info->fill;
2836   const PetscInt    *rip, *riip;
2837   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2838   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2839   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2840   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2841   PetscBT            lnkbt;
2842   IS                 iperm;
2843 
2844   PetscFunctionBegin;
2845   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2846   PetscCall(MatMissingDiagonal(A, &missing, &i));
2847   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2848 
2849   /* check whether perm is the identity mapping */
2850   PetscCall(ISIdentity(perm, &perm_identity));
2851   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2852   PetscCall(ISGetIndices(iperm, &riip));
2853   PetscCall(ISGetIndices(perm, &rip));
2854 
2855   /* initialization */
2856   PetscCall(PetscMalloc1(am + 1, &ui));
2857   PetscCall(PetscMalloc1(am + 1, &udiag));
2858   ui[0] = 0;
2859 
2860   /* jl: linked list for storing indices of the pivot rows
2861      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2862   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2863   for (i = 0; i < am; i++) {
2864     jl[i] = am;
2865     il[i] = 0;
2866   }
2867 
2868   /* create and initialize a linked list for storing column indices of the active row k */
2869   nlnk = am + 1;
2870   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2871 
2872   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2873   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2874   current_space = free_space;
2875 
2876   for (k = 0; k < am; k++) { /* for each active row k */
2877     /* initialize lnk by the column indices of row rip[k] of A */
2878     nzk   = 0;
2879     ncols = ai[rip[k] + 1] - ai[rip[k]];
2880     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2881     ncols_upper = 0;
2882     for (j = 0; j < ncols; j++) {
2883       i = riip[*(aj + ai[rip[k]] + j)];
2884       if (i >= k) { /* only take upper triangular entry */
2885         cols[ncols_upper] = i;
2886         ncols_upper++;
2887       }
2888     }
2889     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2890     nzk += nlnk;
2891 
2892     /* update lnk by computing fill-in for each pivot row to be merged in */
2893     prow = jl[k]; /* 1st pivot row */
2894 
2895     while (prow < k) {
2896       nextprow = jl[prow];
2897       /* merge prow into k-th row */
2898       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2899       jmax   = ui[prow + 1];
2900       ncols  = jmax - jmin;
2901       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2902       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2903       nzk += nlnk;
2904 
2905       /* update il and jl for prow */
2906       if (jmin < jmax) {
2907         il[prow] = jmin;
2908         j        = *uj_ptr;
2909         jl[prow] = jl[j];
2910         jl[j]    = prow;
2911       }
2912       prow = nextprow;
2913     }
2914 
2915     /* if free space is not available, make more free space */
2916     if (current_space->local_remaining < nzk) {
2917       i = am - k + 1;                                    /* num of unfactored rows */
2918       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2919       PetscCall(PetscFreeSpaceGet(i, &current_space));
2920       reallocs++;
2921     }
2922 
2923     /* copy data into free space, then initialize lnk */
2924     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2925 
2926     /* add the k-th row into il and jl */
2927     if (nzk > 1) {
2928       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2929       jl[k] = jl[i];
2930       jl[i] = k;
2931       il[k] = ui[k] + 1;
2932     }
2933     ui_ptr[k] = current_space->array;
2934 
2935     current_space->array += nzk;
2936     current_space->local_used += nzk;
2937     current_space->local_remaining -= nzk;
2938 
2939     ui[k + 1] = ui[k] + nzk;
2940   }
2941 
2942   PetscCall(ISRestoreIndices(perm, &rip));
2943   PetscCall(ISRestoreIndices(iperm, &riip));
2944   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2945 
2946   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2947   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2948   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2949   PetscCall(PetscLLDestroy(lnk, lnkbt));
2950 
2951   /* put together the new matrix in MATSEQSBAIJ format */
2952 
2953   b               = (Mat_SeqSBAIJ *)fact->data;
2954   b->singlemalloc = PETSC_FALSE;
2955   b->free_a       = PETSC_TRUE;
2956   b->free_ij      = PETSC_TRUE;
2957 
2958   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2959 
2960   b->j         = uj;
2961   b->i         = ui;
2962   b->diag      = udiag;
2963   b->free_diag = PETSC_TRUE;
2964   b->ilen      = NULL;
2965   b->imax      = NULL;
2966   b->row       = perm;
2967   b->col       = perm;
2968 
2969   PetscCall(PetscObjectReference((PetscObject)perm));
2970   PetscCall(PetscObjectReference((PetscObject)perm));
2971 
2972   b->icol          = iperm;
2973   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2974 
2975   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2976 
2977   b->maxnz = b->nz = ui[am];
2978 
2979   fact->info.factor_mallocs   = reallocs;
2980   fact->info.fill_ratio_given = fill;
2981   if (ai[am] != 0) {
2982     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2983     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2984   } else {
2985     fact->info.fill_ratio_needed = 0.0;
2986   }
2987 #if defined(PETSC_USE_INFO)
2988   if (ai[am] != 0) {
2989     PetscReal af = fact->info.fill_ratio_needed;
2990     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2991     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2992     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2993   } else {
2994     PetscCall(PetscInfo(A, "Empty matrix\n"));
2995   }
2996 #endif
2997   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2998   PetscFunctionReturn(PETSC_SUCCESS);
2999 }
3000 
3001 #if 0
3002 // unused
3003 static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
3004 {
3005   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
3006   Mat_SeqSBAIJ      *b;
3007   PetscBool          perm_identity, missing;
3008   PetscReal          fill = info->fill;
3009   const PetscInt    *rip, *riip;
3010   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
3011   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
3012   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
3013   PetscFreeSpaceList free_space = NULL, current_space = NULL;
3014   PetscBT            lnkbt;
3015   IS                 iperm;
3016 
3017   PetscFunctionBegin;
3018   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
3019   PetscCall(MatMissingDiagonal(A, &missing, &i));
3020   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3021 
3022   /* check whether perm is the identity mapping */
3023   PetscCall(ISIdentity(perm, &perm_identity));
3024   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
3025   PetscCall(ISGetIndices(iperm, &riip));
3026   PetscCall(ISGetIndices(perm, &rip));
3027 
3028   /* initialization */
3029   PetscCall(PetscMalloc1(am + 1, &ui));
3030   ui[0] = 0;
3031 
3032   /* jl: linked list for storing indices of the pivot rows
3033      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3034   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
3035   for (i = 0; i < am; i++) {
3036     jl[i] = am;
3037     il[i] = 0;
3038   }
3039 
3040   /* create and initialize a linked list for storing column indices of the active row k */
3041   nlnk = am + 1;
3042   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
3043 
3044   /* initial FreeSpace size is fill*(ai[am]+1) */
3045   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
3046   current_space = free_space;
3047 
3048   for (k = 0; k < am; k++) { /* for each active row k */
3049     /* initialize lnk by the column indices of row rip[k] of A */
3050     nzk   = 0;
3051     ncols = ai[rip[k] + 1] - ai[rip[k]];
3052     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
3053     ncols_upper = 0;
3054     for (j = 0; j < ncols; j++) {
3055       i = riip[*(aj + ai[rip[k]] + j)];
3056       if (i >= k) { /* only take upper triangular entry */
3057         cols[ncols_upper] = i;
3058         ncols_upper++;
3059       }
3060     }
3061     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
3062     nzk += nlnk;
3063 
3064     /* update lnk by computing fill-in for each pivot row to be merged in */
3065     prow = jl[k]; /* 1st pivot row */
3066 
3067     while (prow < k) {
3068       nextprow = jl[prow];
3069       /* merge prow into k-th row */
3070       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3071       jmax   = ui[prow + 1];
3072       ncols  = jmax - jmin;
3073       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3074       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
3075       nzk += nlnk;
3076 
3077       /* update il and jl for prow */
3078       if (jmin < jmax) {
3079         il[prow] = jmin;
3080         j        = *uj_ptr;
3081         jl[prow] = jl[j];
3082         jl[j]    = prow;
3083       }
3084       prow = nextprow;
3085     }
3086 
3087     /* if free space is not available, make more free space */
3088     if (current_space->local_remaining < nzk) {
3089       i = am - k + 1;                     /* num of unfactored rows */
3090       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3091       PetscCall(PetscFreeSpaceGet(i, &current_space));
3092       reallocs++;
3093     }
3094 
3095     /* copy data into free space, then initialize lnk */
3096     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
3097 
3098     /* add the k-th row into il and jl */
3099     if (nzk - 1 > 0) {
3100       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3101       jl[k] = jl[i];
3102       jl[i] = k;
3103       il[k] = ui[k] + 1;
3104     }
3105     ui_ptr[k] = current_space->array;
3106 
3107     current_space->array += nzk;
3108     current_space->local_used += nzk;
3109     current_space->local_remaining -= nzk;
3110 
3111     ui[k + 1] = ui[k] + nzk;
3112   }
3113 
3114   #if defined(PETSC_USE_INFO)
3115   if (ai[am] != 0) {
3116     PetscReal af = (PetscReal)ui[am] / (PetscReal)ai[am];
3117     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3118     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3119     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3120   } else {
3121     PetscCall(PetscInfo(A, "Empty matrix\n"));
3122   }
3123   #endif
3124 
3125   PetscCall(ISRestoreIndices(perm, &rip));
3126   PetscCall(ISRestoreIndices(iperm, &riip));
3127   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
3128 
3129   /* destroy list of free space and other temporary array(s) */
3130   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
3131   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3132   PetscCall(PetscLLDestroy(lnk, lnkbt));
3133 
3134   /* put together the new matrix in MATSEQSBAIJ format */
3135 
3136   b               = (Mat_SeqSBAIJ *)fact->data;
3137   b->singlemalloc = PETSC_FALSE;
3138   b->free_a       = PETSC_TRUE;
3139   b->free_ij      = PETSC_TRUE;
3140 
3141   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
3142 
3143   b->j    = uj;
3144   b->i    = ui;
3145   b->diag = NULL;
3146   b->ilen = NULL;
3147   b->imax = NULL;
3148   b->row  = perm;
3149   b->col  = perm;
3150 
3151   PetscCall(PetscObjectReference((PetscObject)perm));
3152   PetscCall(PetscObjectReference((PetscObject)perm));
3153 
3154   b->icol          = iperm;
3155   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3156 
3157   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3158   b->maxnz = b->nz = ui[am];
3159 
3160   fact->info.factor_mallocs   = reallocs;
3161   fact->info.fill_ratio_given = fill;
3162   if (ai[am] != 0) {
3163     fact->info.fill_ratio_needed = (PetscReal)ui[am] / (PetscReal)ai[am];
3164   } else {
3165     fact->info.fill_ratio_needed = 0.0;
3166   }
3167   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3168   PetscFunctionReturn(PETSC_SUCCESS);
3169 }
3170 #endif
3171 
3172 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3173 {
3174   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3175   PetscInt           n  = A->rmap->n;
3176   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3177   PetscScalar       *x, sum;
3178   const PetscScalar *b;
3179   const MatScalar   *aa = a->a, *v;
3180   PetscInt           i, nz;
3181 
3182   PetscFunctionBegin;
3183   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3184 
3185   PetscCall(VecGetArrayRead(bb, &b));
3186   PetscCall(VecGetArrayWrite(xx, &x));
3187 
3188   /* forward solve the lower triangular */
3189   x[0] = b[0];
3190   v    = aa;
3191   vi   = aj;
3192   for (i = 1; i < n; i++) {
3193     nz  = ai[i + 1] - ai[i];
3194     sum = b[i];
3195     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3196     v += nz;
3197     vi += nz;
3198     x[i] = sum;
3199   }
3200 
3201   /* backward solve the upper triangular */
3202   for (i = n - 1; i >= 0; i--) {
3203     v   = aa + adiag[i + 1] + 1;
3204     vi  = aj + adiag[i + 1] + 1;
3205     nz  = adiag[i] - adiag[i + 1] - 1;
3206     sum = x[i];
3207     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3208     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3209   }
3210 
3211   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3212   PetscCall(VecRestoreArrayRead(bb, &b));
3213   PetscCall(VecRestoreArrayWrite(xx, &x));
3214   PetscFunctionReturn(PETSC_SUCCESS);
3215 }
3216 
3217 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3218 {
3219   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3220   IS                 iscol = a->col, isrow = a->row;
3221   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3222   const PetscInt    *rout, *cout, *r, *c;
3223   PetscScalar       *x, *tmp, sum;
3224   const PetscScalar *b;
3225   const MatScalar   *aa = a->a, *v;
3226 
3227   PetscFunctionBegin;
3228   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3229 
3230   PetscCall(VecGetArrayRead(bb, &b));
3231   PetscCall(VecGetArrayWrite(xx, &x));
3232   tmp = a->solve_work;
3233 
3234   PetscCall(ISGetIndices(isrow, &rout));
3235   r = rout;
3236   PetscCall(ISGetIndices(iscol, &cout));
3237   c = cout;
3238 
3239   /* forward solve the lower triangular */
3240   tmp[0] = b[r[0]];
3241   v      = aa;
3242   vi     = aj;
3243   for (i = 1; i < n; i++) {
3244     nz  = ai[i + 1] - ai[i];
3245     sum = b[r[i]];
3246     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3247     tmp[i] = sum;
3248     v += nz;
3249     vi += nz;
3250   }
3251 
3252   /* backward solve the upper triangular */
3253   for (i = n - 1; i >= 0; i--) {
3254     v   = aa + adiag[i + 1] + 1;
3255     vi  = aj + adiag[i + 1] + 1;
3256     nz  = adiag[i] - adiag[i + 1] - 1;
3257     sum = tmp[i];
3258     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3259     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3260   }
3261 
3262   PetscCall(ISRestoreIndices(isrow, &rout));
3263   PetscCall(ISRestoreIndices(iscol, &cout));
3264   PetscCall(VecRestoreArrayRead(bb, &b));
3265   PetscCall(VecRestoreArrayWrite(xx, &x));
3266   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3267   PetscFunctionReturn(PETSC_SUCCESS);
3268 }
3269 
3270 #if 0
3271 // unused
3272 /*
3273     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3274 */
3275 static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3276 {
3277   Mat             B = *fact;
3278   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3279   IS              isicol;
3280   const PetscInt *r, *ic;
3281   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3282   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3283   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3284   PetscInt        nlnk, *lnk;
3285   PetscBT         lnkbt;
3286   PetscBool       row_identity, icol_identity;
3287   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3288   const PetscInt *ics;
3289   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3290   PetscReal       dt = info->dt, shift = info->shiftamount;
3291   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3292   PetscBool       missing;
3293 
3294   PetscFunctionBegin;
3295   if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005;
3296   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3297 
3298   /* symbolic factorization, can be reused */
3299   PetscCall(MatMissingDiagonal(A, &missing, &i));
3300   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3301   adiag = a->diag;
3302 
3303   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3304 
3305   /* bdiag is location of diagonal in factor */
3306   PetscCall(PetscMalloc1(n + 1, &bdiag));     /* becomes b->diag */
3307   PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */
3308 
3309   /* allocate row pointers bi */
3310   PetscCall(PetscMalloc1(2 * n + 2, &bi));
3311 
3312   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3313   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3314   nnz_max = ai[n] + 2 * n * dtcount + 2;
3315 
3316   PetscCall(PetscMalloc1(nnz_max + 1, &bj));
3317   PetscCall(PetscMalloc1(nnz_max + 1, &ba));
3318 
3319   /* put together the new matrix */
3320   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3321   b = (Mat_SeqAIJ *)B->data;
3322 
3323   b->free_a       = PETSC_TRUE;
3324   b->free_ij      = PETSC_TRUE;
3325   b->singlemalloc = PETSC_FALSE;
3326 
3327   b->a    = ba;
3328   b->j    = bj;
3329   b->i    = bi;
3330   b->diag = bdiag;
3331   b->ilen = NULL;
3332   b->imax = NULL;
3333   b->row  = isrow;
3334   b->col  = iscol;
3335   PetscCall(PetscObjectReference((PetscObject)isrow));
3336   PetscCall(PetscObjectReference((PetscObject)iscol));
3337   b->icol = isicol;
3338 
3339   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3340   b->maxnz = nnz_max;
3341 
3342   B->factortype            = MAT_FACTOR_ILUDT;
3343   B->info.factor_mallocs   = 0;
3344   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3345   /*  end of symbolic factorization */
3346 
3347   PetscCall(ISGetIndices(isrow, &r));
3348   PetscCall(ISGetIndices(isicol, &ic));
3349   ics = ic;
3350 
3351   /* linked list for storing column indices of the active row */
3352   nlnk = n + 1;
3353   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
3354 
3355   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3356   PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3357   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3358   PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3359   PetscCall(PetscArrayzero(rtmp, n));
3360 
3361   bi[0]         = 0;
3362   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3363   bdiag_rev[n]  = bdiag[0];
3364   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3365   for (i = 0; i < n; i++) {
3366     /* copy initial fill into linked list */
3367     nzi = ai[r[i] + 1] - ai[r[i]];
3368     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3369     nzi_al = adiag[r[i]] - ai[r[i]];
3370     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3371     ajtmp  = aj + ai[r[i]];
3372     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3373 
3374     /* load in initial (unfactored row) */
3375     aatmp = a->a + ai[r[i]];
3376     for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;
3377 
3378     /* add pivot rows into linked list */
3379     row = lnk[n];
3380     while (row < i) {
3381       nzi_bl = bi[row + 1] - bi[row] + 1;
3382       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3383       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3384       nzi += nlnk;
3385       row = lnk[row];
3386     }
3387 
3388     /* copy data from lnk into jtmp, then initialize lnk */
3389     PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));
3390 
3391     /* numerical factorization */
3392     bjtmp = jtmp;
3393     row   = *bjtmp++; /* 1st pivot row */
3394     while (row < i) {
3395       pc         = rtmp + row;
3396       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3397       multiplier = (*pc) * (*pv);
3398       *pc        = multiplier;
3399       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3400         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3401         pv = ba + bdiag[row + 1] + 1;
3402         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3403         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3404         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3405       }
3406       row = *bjtmp++;
3407     }
3408 
3409     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3410     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3411     nzi_bl   = 0;
3412     j        = 0;
3413     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3414       vtmp[j]       = rtmp[jtmp[j]];
3415       rtmp[jtmp[j]] = 0.0;
3416       nzi_bl++;
3417       j++;
3418     }
3419     nzi_bu = nzi - nzi_bl - 1;
3420     while (j < nzi) {
3421       vtmp[j]       = rtmp[jtmp[j]];
3422       rtmp[jtmp[j]] = 0.0;
3423       j++;
3424     }
3425 
3426     bjtmp = bj + bi[i];
3427     batmp = ba + bi[i];
3428     /* apply level dropping rule to L part */
3429     ncut = nzi_al + dtcount;
3430     if (ncut < nzi_bl) {
3431       PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3432       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3433     } else {
3434       ncut = nzi_bl;
3435     }
3436     for (j = 0; j < ncut; j++) {
3437       bjtmp[j] = jtmp[j];
3438       batmp[j] = vtmp[j];
3439     }
3440     bi[i + 1] = bi[i] + ncut;
3441     nzi       = ncut + 1;
3442 
3443     /* apply level dropping rule to U part */
3444     ncut = nzi_au + dtcount;
3445     if (ncut < nzi_bu) {
3446       PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3447       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3448     } else {
3449       ncut = nzi_bu;
3450     }
3451     nzi += ncut;
3452 
3453     /* mark bdiagonal */
3454     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3455     bdiag_rev[n - i - 1] = bdiag[i + 1];
3456     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
3457     bjtmp                = bj + bdiag[i];
3458     batmp                = ba + bdiag[i];
3459     *bjtmp               = i;
3460     *batmp               = diag_tmp; /* rtmp[i]; */
3461     if (*batmp == 0.0) *batmp = dt + shift;
3462     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
3463 
3464     bjtmp = bj + bdiag[i + 1] + 1;
3465     batmp = ba + bdiag[i + 1] + 1;
3466     for (k = 0; k < ncut; k++) {
3467       bjtmp[k] = jtmp[nzi_bl + 1 + k];
3468       batmp[k] = vtmp[nzi_bl + 1 + k];
3469     }
3470 
3471     im[i] = nzi; /* used by PetscLLAddSortedLU() */
3472   }              /* for (i=0; i<n; i++) */
3473   PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);
3474 
3475   PetscCall(ISRestoreIndices(isrow, &r));
3476   PetscCall(ISRestoreIndices(isicol, &ic));
3477 
3478   PetscCall(PetscLLDestroy(lnk, lnkbt));
3479   PetscCall(PetscFree2(im, jtmp));
3480   PetscCall(PetscFree2(rtmp, vtmp));
3481   PetscCall(PetscFree(bdiag_rev));
3482 
3483   PetscCall(PetscLogFlops(B->cmap->n));
3484   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3485 
3486   PetscCall(ISIdentity(isrow, &row_identity));
3487   PetscCall(ISIdentity(isicol, &icol_identity));
3488   if (row_identity && icol_identity) {
3489     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3490   } else {
3491     B->ops->solve = MatSolve_SeqAIJ;
3492   }
3493 
3494   B->ops->solveadd          = NULL;
3495   B->ops->solvetranspose    = NULL;
3496   B->ops->solvetransposeadd = NULL;
3497   B->ops->matsolve          = NULL;
3498   B->assembled              = PETSC_TRUE;
3499   B->preallocated           = PETSC_TRUE;
3500   PetscFunctionReturn(PETSC_SUCCESS);
3501 }
3502 
3503 /* a wrapper of MatILUDTFactor_SeqAIJ() */
3504 /*
3505     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3506 */
3507 
3508 static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3509 {
3510   PetscFunctionBegin;
3511   PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3512   PetscFunctionReturn(PETSC_SUCCESS);
3513 }
3514 
3515 /*
3516    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3517    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3518 */
3519 /*
3520     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3521 */
3522 
3523 static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3524 {
3525   Mat             C = fact;
3526   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3527   IS              isrow = b->row, isicol = b->icol;
3528   const PetscInt *r, *ic, *ics;
3529   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3530   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3531   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3532   PetscReal       dt = info->dt, shift = info->shiftamount;
3533   PetscBool       row_identity, col_identity;
3534 
3535   PetscFunctionBegin;
3536   PetscCall(ISGetIndices(isrow, &r));
3537   PetscCall(ISGetIndices(isicol, &ic));
3538   PetscCall(PetscMalloc1(n + 1, &rtmp));
3539   ics = ic;
3540 
3541   for (i = 0; i < n; i++) {
3542     /* initialize rtmp array */
3543     nzl   = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */
3544     bjtmp = bj + bi[i];
3545     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3546     rtmp[i] = 0.0;
3547     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */
3548     bjtmp   = bj + bdiag[i + 1] + 1;
3549     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
3550 
3551     /* load in initial unfactored row of A */
3552     nz    = ai[r[i] + 1] - ai[r[i]];
3553     ajtmp = aj + ai[r[i]];
3554     v     = aa + ai[r[i]];
3555     for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];
3556 
3557     /* numerical factorization */
3558     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3559     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3560     k     = 0;
3561     while (k < nzl) {
3562       row        = *bjtmp++;
3563       pc         = rtmp + row;
3564       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3565       multiplier = (*pc) * (*pv);
3566       *pc        = multiplier;
3567       if (PetscAbsScalar(multiplier) > dt) {
3568         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3569         pv = b->a + bdiag[row + 1] + 1;
3570         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3571         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3572         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3573       }
3574       k++;
3575     }
3576 
3577     /* finished row so stick it into b->a */
3578     /* L-part */
3579     pv  = b->a + bi[i];
3580     pj  = bj + bi[i];
3581     nzl = bi[i + 1] - bi[i];
3582     for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];
3583 
3584     /* diagonal: invert diagonal entries for simpler triangular solves */
3585     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3586     b->a[bdiag[i]] = 1.0 / rtmp[i];
3587 
3588     /* U-part */
3589     pv  = b->a + bdiag[i + 1] + 1;
3590     pj  = bj + bdiag[i + 1] + 1;
3591     nzu = bdiag[i] - bdiag[i + 1] - 1;
3592     for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3593   }
3594 
3595   PetscCall(PetscFree(rtmp));
3596   PetscCall(ISRestoreIndices(isicol, &ic));
3597   PetscCall(ISRestoreIndices(isrow, &r));
3598 
3599   PetscCall(ISIdentity(isrow, &row_identity));
3600   PetscCall(ISIdentity(isicol, &col_identity));
3601   if (row_identity && col_identity) {
3602     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3603   } else {
3604     C->ops->solve = MatSolve_SeqAIJ;
3605   }
3606   C->ops->solveadd          = NULL;
3607   C->ops->solvetranspose    = NULL;
3608   C->ops->solvetransposeadd = NULL;
3609   C->ops->matsolve          = NULL;
3610   C->assembled              = PETSC_TRUE;
3611   C->preallocated           = PETSC_TRUE;
3612 
3613   PetscCall(PetscLogFlops(C->cmap->n));
3614   PetscFunctionReturn(PETSC_SUCCESS);
3615 }
3616 #endif
3617