xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 6a210b70a61472cf8733db59e8a3fa68f6578c01)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/sbaij/seq/sbaij.h>
3 #include <petscbt.h>
4 #include <../src/mat/utils/freespace.h>
5 
6 /*
7       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
8 
9       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
10 */
11 #if 0
12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
13 {
14   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
15   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
16   const PetscInt    *ai = a->i, *aj = a->j;
17   const PetscScalar *aa = a->a;
18   PetscBool         *done;
19   PetscReal          best, past = 0, future;
20 
21   PetscFunctionBegin;
22   /* pick initial row */
23   best = -1;
24   for (i = 0; i < n; i++) {
25     future = 0.0;
26     for (j = ai[i]; j < ai[i + 1]; j++) {
27       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
28       else past = PetscAbsScalar(aa[j]);
29     }
30     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
31     if (past / future > best) {
32       best    = past / future;
33       current = i;
34     }
35   }
36 
37   PetscCall(PetscMalloc1(n, &done));
38   PetscCall(PetscArrayzero(done, n));
39   PetscCall(PetscMalloc1(n, &order));
40   order[0] = current;
41   for (i = 0; i < n - 1; i++) {
42     done[current] = PETSC_TRUE;
43     best          = -1;
44     /* loop over all neighbors of current pivot */
45     for (j = ai[current]; j < ai[current + 1]; j++) {
46       jj = aj[j];
47       if (done[jj]) continue;
48       /* loop over columns of potential next row computing weights for below and above diagonal */
49       past = future = 0.0;
50       for (k = ai[jj]; k < ai[jj + 1]; k++) {
51         kk = aj[k];
52         if (done[kk]) past += PetscAbsScalar(aa[k]);
53         else if (kk != jj) future += PetscAbsScalar(aa[k]);
54       }
55       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
56       if (past / future > best) {
57         best       = past / future;
58         newcurrent = jj;
59       }
60     }
61     if (best == -1) { /* no neighbors to select from so select best of all that remain */
62       best = -1;
63       for (k = 0; k < n; k++) {
64         if (done[k]) continue;
65         future = 0.0;
66         past   = 0.0;
67         for (j = ai[k]; j < ai[k + 1]; j++) {
68           kk = aj[j];
69           if (done[kk]) past += PetscAbsScalar(aa[j]);
70           else if (kk != k) future += PetscAbsScalar(aa[j]);
71         }
72         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
73         if (past / future > best) {
74           best       = past / future;
75           newcurrent = k;
76         }
77       }
78     }
79     PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
80     current      = newcurrent;
81     order[i + 1] = current;
82   }
83   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
84   *icol = *irow;
85   PetscCall(PetscObjectReference((PetscObject)*irow));
86   PetscCall(PetscFree(done));
87   PetscCall(PetscFree(order));
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 #endif
91 
92 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
93 {
94   PetscFunctionBegin;
95   *type = MATSOLVERPETSC;
96   PetscFunctionReturn(PETSC_SUCCESS);
97 }
98 
99 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
100 {
101   PetscInt n = A->rmap->n;
102 
103   PetscFunctionBegin;
104 #if defined(PETSC_USE_COMPLEX)
105   if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
106     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
107     *B = NULL;
108     PetscFunctionReturn(PETSC_SUCCESS);
109   }
110 #endif
111 
112   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
113   PetscCall(MatSetSizes(*B, n, n, n, n));
114   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
115     PetscCall(MatSetType(*B, MATSEQAIJ));
116 
117     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
118     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
119 
120     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
121     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
122     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
123     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
124   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
125     PetscCall(MatSetType(*B, MATSEQSBAIJ));
126     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
127 
128     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
129     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
130     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
131     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
132   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
133   (*B)->factortype = ftype;
134 
135   PetscCall(PetscFree((*B)->solvertype));
136   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
137   (*B)->canuseordering = PETSC_TRUE;
138   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
139   PetscFunctionReturn(PETSC_SUCCESS);
140 }
141 
142 #if 0
143 // currently unused
144 static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
145 {
146   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
147   IS                 isicol;
148   const PetscInt    *r, *ic;
149   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
150   PetscInt          *bi, *bj, *ajtmp;
151   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
152   PetscReal          f;
153   PetscInt           nlnk, *lnk, k, **bi_ptr;
154   PetscFreeSpaceList free_space = NULL, current_space = NULL;
155   PetscBT            lnkbt;
156   PetscBool          missing;
157 
158   PetscFunctionBegin;
159   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
160   PetscCall(MatMissingDiagonal(A, &missing, &i));
161   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
162 
163   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
164   PetscCall(ISGetIndices(isrow, &r));
165   PetscCall(ISGetIndices(isicol, &ic));
166 
167   /* get new row pointers */
168   PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi));
169   bi[0] = 0;
170 
171   /* bdiag is location of diagonal in factor */
172   PetscCall(PetscMalloc1(n + 1, &bdiag));
173   bdiag[0] = 0;
174 
175   /* linked list for storing column indices of the active row */
176   nlnk = n + 1;
177   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
178   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
179 
180   /* initial FreeSpace size is f*(ai[n]+1) */
181   f = info->fill;
182   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
183   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
184   current_space = free_space;
185 
186   for (i = 0; i < n; i++) {
187     /* copy previous fill into linked list */
188     nzi   = 0;
189     nnz   = ai[r[i] + 1] - ai[r[i]];
190     ajtmp = aj + ai[r[i]];
191     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
192     nzi += nlnk;
193 
194     /* add pivot rows into linked list */
195     row = lnk[n];
196     while (row < i) {
197       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
198       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
199       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
200       nzi += nlnk;
201       row = lnk[row];
202     }
203     bi[i + 1] = bi[i] + nzi;
204     im[i]     = nzi;
205 
206     /* mark bdiag */
207     nzbd = 0;
208     nnz  = nzi;
209     k    = lnk[n];
210     while (nnz-- && k < i) {
211       nzbd++;
212       k = lnk[k];
213     }
214     bdiag[i] = bi[i] + nzbd;
215 
216     /* if free space is not available, make more free space */
217     if (current_space->local_remaining < nzi) {
218       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
219       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
220       reallocs++;
221     }
222 
223     /* copy data into free space, then initialize lnk */
224     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
225 
226     bi_ptr[i] = current_space->array;
227     current_space->array += nzi;
228     current_space->local_used += nzi;
229     current_space->local_remaining -= nzi;
230   }
231   #if defined(PETSC_USE_INFO)
232   if (ai[n] != 0) {
233     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
234     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
235     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
236     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
237     PetscCall(PetscInfo(A, "for best performance.\n"));
238   } else {
239     PetscCall(PetscInfo(A, "Empty matrix\n"));
240   }
241   #endif
242 
243   PetscCall(ISRestoreIndices(isrow, &r));
244   PetscCall(ISRestoreIndices(isicol, &ic));
245 
246   /* destroy list of free space and other temporary array(s) */
247   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj));
248   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
249   PetscCall(PetscLLDestroy(lnk, lnkbt));
250   PetscCall(PetscFree2(bi_ptr, im));
251 
252   /* put together the new matrix */
253   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
254   b = (Mat_SeqAIJ *)(B)->data;
255   b->free_ij      = PETSC_TRUE;
256   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a));
257   b->free_a       = PETSC_TRUE;
258   b->j    = bj;
259   b->i    = bi;
260   b->diag = bdiag;
261   b->ilen = NULL;
262   b->imax = NULL;
263   b->row  = isrow;
264   b->col  = iscol;
265   PetscCall(PetscObjectReference((PetscObject)isrow));
266   PetscCall(PetscObjectReference((PetscObject)iscol));
267   b->icol = isicol;
268   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
269 
270   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
271   b->maxnz = b->nz = bi[n];
272 
273   (B)->factortype            = MAT_FACTOR_LU;
274   (B)->info.factor_mallocs   = reallocs;
275   (B)->info.fill_ratio_given = f;
276 
277   if (ai[n]) {
278     (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
279   } else {
280     (B)->info.fill_ratio_needed = 0.0;
281   }
282   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
283   if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 #endif
287 
288 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
289 {
290   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
291   IS                 isicol;
292   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
293   PetscInt           i, n = A->rmap->n;
294   PetscInt          *bi, *bj;
295   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
296   PetscReal          f;
297   PetscInt           nlnk, *lnk, k, **bi_ptr;
298   PetscFreeSpaceList free_space = NULL, current_space = NULL;
299   PetscBT            lnkbt;
300   PetscBool          missing;
301 
302   PetscFunctionBegin;
303   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
304   PetscCall(MatMissingDiagonal(A, &missing, &i));
305   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
306 
307   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
308   PetscCall(ISGetIndices(isrow, &r));
309   PetscCall(ISGetIndices(isicol, &ic));
310 
311   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
312   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
313   PetscCall(PetscMalloc1(n + 1, &bdiag));
314   bi[0] = bdiag[0] = 0;
315 
316   /* linked list for storing column indices of the active row */
317   nlnk = n + 1;
318   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
319 
320   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
321 
322   /* initial FreeSpace size is f*(ai[n]+1) */
323   f = info->fill;
324   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
325   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
326   current_space = free_space;
327 
328   for (i = 0; i < n; i++) {
329     /* copy previous fill into linked list */
330     nzi   = 0;
331     nnz   = ai[r[i] + 1] - ai[r[i]];
332     ajtmp = aj + ai[r[i]];
333     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
334     nzi += nlnk;
335 
336     /* add pivot rows into linked list */
337     row = lnk[n];
338     while (row < i) {
339       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
340       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
341       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
342       nzi += nlnk;
343       row = lnk[row];
344     }
345     bi[i + 1] = bi[i] + nzi;
346     im[i]     = nzi;
347 
348     /* mark bdiag */
349     nzbd = 0;
350     nnz  = nzi;
351     k    = lnk[n];
352     while (nnz-- && k < i) {
353       nzbd++;
354       k = lnk[k];
355     }
356     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
357 
358     /* if free space is not available, make more free space */
359     if (current_space->local_remaining < nzi) {
360       /* estimated additional space needed */
361       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
362       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
363       reallocs++;
364     }
365 
366     /* copy data into free space, then initialize lnk */
367     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
368 
369     bi_ptr[i] = current_space->array;
370     current_space->array += nzi;
371     current_space->local_used += nzi;
372     current_space->local_remaining -= nzi;
373   }
374 
375   PetscCall(ISRestoreIndices(isrow, &r));
376   PetscCall(ISRestoreIndices(isicol, &ic));
377 
378   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
379   PetscCall(PetscShmgetAllocateArray(bi[n] + 1, sizeof(PetscInt), (void **)&bj));
380   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
381   PetscCall(PetscLLDestroy(lnk, lnkbt));
382   PetscCall(PetscFree2(bi_ptr, im));
383 
384   /* put together the new matrix */
385   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
386   b          = (Mat_SeqAIJ *)(B)->data;
387   b->free_ij = PETSC_TRUE;
388   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
389   b->free_a = PETSC_TRUE;
390   b->j      = bj;
391   b->i      = bi;
392   b->diag   = bdiag;
393   b->ilen   = NULL;
394   b->imax   = NULL;
395   b->row    = isrow;
396   b->col    = iscol;
397   PetscCall(PetscObjectReference((PetscObject)isrow));
398   PetscCall(PetscObjectReference((PetscObject)iscol));
399   b->icol = isicol;
400   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
401 
402   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
403   b->maxnz = b->nz = bdiag[0] + 1;
404 
405   B->factortype            = MAT_FACTOR_LU;
406   B->info.factor_mallocs   = reallocs;
407   B->info.fill_ratio_given = f;
408 
409   if (ai[n]) {
410     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
411   } else {
412     B->info.fill_ratio_needed = 0.0;
413   }
414 #if defined(PETSC_USE_INFO)
415   if (ai[n] != 0) {
416     PetscReal af = B->info.fill_ratio_needed;
417     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
418     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
419     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
420     PetscCall(PetscInfo(A, "for best performance.\n"));
421   } else {
422     PetscCall(PetscInfo(A, "Empty matrix\n"));
423   }
424 #endif
425   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
426   if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
427   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
428   PetscFunctionReturn(PETSC_SUCCESS);
429 }
430 
431 /*
432     Trouble in factorization, should we dump the original matrix?
433 */
434 PetscErrorCode MatFactorDumpMatrix(Mat A)
435 {
436   PetscBool flg = PETSC_FALSE;
437 
438   PetscFunctionBegin;
439   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
440   if (flg) {
441     PetscViewer viewer;
442     char        filename[PETSC_MAX_PATH_LEN];
443 
444     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
445     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
446     PetscCall(MatView(A, viewer));
447     PetscCall(PetscViewerDestroy(&viewer));
448   }
449   PetscFunctionReturn(PETSC_SUCCESS);
450 }
451 
452 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
453 {
454   Mat              C = B;
455   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
456   IS               isrow = b->row, isicol = b->icol;
457   const PetscInt  *r, *ic, *ics;
458   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
459   PetscInt         i, j, k, nz, nzL, row, *pj;
460   const PetscInt  *ajtmp, *bjtmp;
461   MatScalar       *rtmp, *pc, multiplier, *pv;
462   const MatScalar *aa = a->a, *v;
463   PetscBool        row_identity, col_identity;
464   FactorShiftCtx   sctx;
465   const PetscInt  *ddiag;
466   PetscReal        rs;
467   MatScalar        d;
468 
469   PetscFunctionBegin;
470   /* MatPivotSetUp(): initialize shift context sctx */
471   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
472 
473   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
474     ddiag          = a->diag;
475     sctx.shift_top = info->zeropivot;
476     for (i = 0; i < n; i++) {
477       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
478       d  = (aa)[ddiag[i]];
479       rs = -PetscAbsScalar(d) - PetscRealPart(d);
480       v  = aa + ai[i];
481       nz = ai[i + 1] - ai[i];
482       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
483       if (rs > sctx.shift_top) sctx.shift_top = rs;
484     }
485     sctx.shift_top *= 1.1;
486     sctx.nshift_max = 5;
487     sctx.shift_lo   = 0.;
488     sctx.shift_hi   = 1.;
489   }
490 
491   PetscCall(ISGetIndices(isrow, &r));
492   PetscCall(ISGetIndices(isicol, &ic));
493   PetscCall(PetscMalloc1(n + 1, &rtmp));
494   ics = ic;
495 
496   do {
497     sctx.newshift = PETSC_FALSE;
498     for (i = 0; i < n; i++) {
499       /* zero rtmp */
500       /* L part */
501       nz    = bi[i + 1] - bi[i];
502       bjtmp = bj + bi[i];
503       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
504 
505       /* U part */
506       nz    = bdiag[i] - bdiag[i + 1];
507       bjtmp = bj + bdiag[i + 1] + 1;
508       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
509 
510       /* load in initial (unfactored row) */
511       nz    = ai[r[i] + 1] - ai[r[i]];
512       ajtmp = aj + ai[r[i]];
513       v     = aa + ai[r[i]];
514       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
515       /* ZeropivotApply() */
516       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
517 
518       /* elimination */
519       bjtmp = bj + bi[i];
520       row   = *bjtmp++;
521       nzL   = bi[i + 1] - bi[i];
522       for (k = 0; k < nzL; k++) {
523         pc = rtmp + row;
524         if (*pc != 0.0) {
525           pv         = b->a + bdiag[row];
526           multiplier = *pc * (*pv);
527           *pc        = multiplier;
528 
529           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
530           pv = b->a + bdiag[row + 1] + 1;
531           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
532 
533           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
534           PetscCall(PetscLogFlops(1 + 2.0 * nz));
535         }
536         row = *bjtmp++;
537       }
538 
539       /* finished row so stick it into b->a */
540       rs = 0.0;
541       /* L part */
542       pv = b->a + bi[i];
543       pj = b->j + bi[i];
544       nz = bi[i + 1] - bi[i];
545       for (j = 0; j < nz; j++) {
546         pv[j] = rtmp[pj[j]];
547         rs += PetscAbsScalar(pv[j]);
548       }
549 
550       /* U part */
551       pv = b->a + bdiag[i + 1] + 1;
552       pj = b->j + bdiag[i + 1] + 1;
553       nz = bdiag[i] - bdiag[i + 1] - 1;
554       for (j = 0; j < nz; j++) {
555         pv[j] = rtmp[pj[j]];
556         rs += PetscAbsScalar(pv[j]);
557       }
558 
559       sctx.rs = rs;
560       sctx.pv = rtmp[i];
561       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
562       if (sctx.newshift) break; /* break for-loop */
563       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
564 
565       /* Mark diagonal and invert diagonal for simpler triangular solves */
566       pv  = b->a + bdiag[i];
567       *pv = 1.0 / rtmp[i];
568 
569     } /* endof for (i=0; i<n; i++) { */
570 
571     /* MatPivotRefine() */
572     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
573       /*
574        * if no shift in this attempt & shifting & started shifting & can refine,
575        * then try lower shift
576        */
577       sctx.shift_hi       = sctx.shift_fraction;
578       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
579       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
580       sctx.newshift       = PETSC_TRUE;
581       sctx.nshift++;
582     }
583   } while (sctx.newshift);
584 
585   PetscCall(PetscFree(rtmp));
586   PetscCall(ISRestoreIndices(isicol, &ic));
587   PetscCall(ISRestoreIndices(isrow, &r));
588 
589   PetscCall(ISIdentity(isrow, &row_identity));
590   PetscCall(ISIdentity(isicol, &col_identity));
591   if (b->inode.size) {
592     C->ops->solve = MatSolve_SeqAIJ_Inode;
593   } else if (row_identity && col_identity) {
594     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
595   } else {
596     C->ops->solve = MatSolve_SeqAIJ;
597   }
598   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
599   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
600   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
601   C->ops->matsolve          = MatMatSolve_SeqAIJ;
602   C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
603   C->assembled              = PETSC_TRUE;
604   C->preallocated           = PETSC_TRUE;
605 
606   PetscCall(PetscLogFlops(C->cmap->n));
607 
608   /* MatShiftView(A,info,&sctx) */
609   if (sctx.nshift) {
610     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
611       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
612     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
613       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
614     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
615       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
616     }
617   }
618   PetscFunctionReturn(PETSC_SUCCESS);
619 }
620 
621 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
622 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
623 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
624 
625 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
626 {
627   Mat              C = B;
628   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
629   IS               isrow = b->row, isicol = b->icol;
630   const PetscInt  *r, *ic, *ics;
631   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
632   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
633   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
634   MatScalar       *pv, *rtmp, *pc, multiplier, d;
635   const MatScalar *v, *aa = a->a;
636   PetscReal        rs = 0.0;
637   FactorShiftCtx   sctx;
638   const PetscInt  *ddiag;
639   PetscBool        row_identity, col_identity;
640 
641   PetscFunctionBegin;
642   /* MatPivotSetUp(): initialize shift context sctx */
643   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
644 
645   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
646     ddiag          = a->diag;
647     sctx.shift_top = info->zeropivot;
648     for (i = 0; i < n; i++) {
649       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
650       d  = (aa)[ddiag[i]];
651       rs = -PetscAbsScalar(d) - PetscRealPart(d);
652       v  = aa + ai[i];
653       nz = ai[i + 1] - ai[i];
654       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
655       if (rs > sctx.shift_top) sctx.shift_top = rs;
656     }
657     sctx.shift_top *= 1.1;
658     sctx.nshift_max = 5;
659     sctx.shift_lo   = 0.;
660     sctx.shift_hi   = 1.;
661   }
662 
663   PetscCall(ISGetIndices(isrow, &r));
664   PetscCall(ISGetIndices(isicol, &ic));
665   PetscCall(PetscMalloc1(n + 1, &rtmp));
666   ics = ic;
667 
668   do {
669     sctx.newshift = PETSC_FALSE;
670     for (i = 0; i < n; i++) {
671       nz    = bi[i + 1] - bi[i];
672       bjtmp = bj + bi[i];
673       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
674 
675       /* load in initial (unfactored row) */
676       nz    = ai[r[i] + 1] - ai[r[i]];
677       ajtmp = aj + ai[r[i]];
678       v     = aa + ai[r[i]];
679       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
680       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
681 
682       row = *bjtmp++;
683       while (row < i) {
684         pc = rtmp + row;
685         if (*pc != 0.0) {
686           pv         = b->a + diag_offset[row];
687           pj         = b->j + diag_offset[row] + 1;
688           multiplier = *pc / *pv++;
689           *pc        = multiplier;
690           nz         = bi[row + 1] - diag_offset[row] - 1;
691           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
692           PetscCall(PetscLogFlops(1 + 2.0 * nz));
693         }
694         row = *bjtmp++;
695       }
696       /* finished row so stick it into b->a */
697       pv   = b->a + bi[i];
698       pj   = b->j + bi[i];
699       nz   = bi[i + 1] - bi[i];
700       diag = diag_offset[i] - bi[i];
701       rs   = 0.0;
702       for (j = 0; j < nz; j++) {
703         pv[j] = rtmp[pj[j]];
704         rs += PetscAbsScalar(pv[j]);
705       }
706       rs -= PetscAbsScalar(pv[diag]);
707 
708       sctx.rs = rs;
709       sctx.pv = pv[diag];
710       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
711       if (sctx.newshift) break;
712       pv[diag] = sctx.pv;
713     }
714 
715     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
716       /*
717        * if no shift in this attempt & shifting & started shifting & can refine,
718        * then try lower shift
719        */
720       sctx.shift_hi       = sctx.shift_fraction;
721       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
722       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
723       sctx.newshift       = PETSC_TRUE;
724       sctx.nshift++;
725     }
726   } while (sctx.newshift);
727 
728   /* invert diagonal entries for simpler triangular solves */
729   for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
730   PetscCall(PetscFree(rtmp));
731   PetscCall(ISRestoreIndices(isicol, &ic));
732   PetscCall(ISRestoreIndices(isrow, &r));
733 
734   PetscCall(ISIdentity(isrow, &row_identity));
735   PetscCall(ISIdentity(isicol, &col_identity));
736   if (row_identity && col_identity) {
737     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
738   } else {
739     C->ops->solve = MatSolve_SeqAIJ_inplace;
740   }
741   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
742   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
743   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
744   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
745   C->ops->matsolvetranspose = NULL;
746 
747   C->assembled    = PETSC_TRUE;
748   C->preallocated = PETSC_TRUE;
749 
750   PetscCall(PetscLogFlops(C->cmap->n));
751   if (sctx.nshift) {
752     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
753       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
754     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
755       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
756     }
757   }
758   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
759   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
760 
761   PetscCall(MatSeqAIJCheckInode(C));
762   PetscFunctionReturn(PETSC_SUCCESS);
763 }
764 
765 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
766 
767 /*
768    This routine implements inplace ILU(0) with row or/and column permutations.
769    Input:
770      A - original matrix
771    Output;
772      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
773          a->j (col index) is permuted by the inverse of colperm, then sorted
774          a->a reordered accordingly with a->j
775          a->diag (ptr to diagonal elements) is updated.
776 */
777 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
778 {
779   Mat_SeqAIJ      *a     = (Mat_SeqAIJ *)A->data;
780   IS               isrow = a->row, isicol = a->icol;
781   const PetscInt  *r, *ic, *ics;
782   PetscInt         i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
783   PetscInt        *ajtmp, nz, row;
784   PetscInt        *diag = a->diag, nbdiag, *pj;
785   PetscScalar     *rtmp, *pc, multiplier, d;
786   MatScalar       *pv, *v;
787   PetscReal        rs;
788   FactorShiftCtx   sctx;
789   const MatScalar *aa = a->a, *vtmp;
790 
791   PetscFunctionBegin;
792   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
793 
794   /* MatPivotSetUp(): initialize shift context sctx */
795   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
796 
797   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
798     const PetscInt *ddiag = a->diag;
799     sctx.shift_top        = info->zeropivot;
800     for (i = 0; i < n; i++) {
801       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
802       d    = (aa)[ddiag[i]];
803       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
804       vtmp = aa + ai[i];
805       nz   = ai[i + 1] - ai[i];
806       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
807       if (rs > sctx.shift_top) sctx.shift_top = rs;
808     }
809     sctx.shift_top *= 1.1;
810     sctx.nshift_max = 5;
811     sctx.shift_lo   = 0.;
812     sctx.shift_hi   = 1.;
813   }
814 
815   PetscCall(ISGetIndices(isrow, &r));
816   PetscCall(ISGetIndices(isicol, &ic));
817   PetscCall(PetscMalloc1(n + 1, &rtmp));
818   PetscCall(PetscArrayzero(rtmp, n + 1));
819   ics = ic;
820 
821 #if defined(MV)
822   sctx.shift_top      = 0.;
823   sctx.nshift_max     = 0;
824   sctx.shift_lo       = 0.;
825   sctx.shift_hi       = 0.;
826   sctx.shift_fraction = 0.;
827 
828   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
829     sctx.shift_top = 0.;
830     for (i = 0; i < n; i++) {
831       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
832       d  = (a->a)[diag[i]];
833       rs = -PetscAbsScalar(d) - PetscRealPart(d);
834       v  = a->a + ai[i];
835       nz = ai[i + 1] - ai[i];
836       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
837       if (rs > sctx.shift_top) sctx.shift_top = rs;
838     }
839     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
840     sctx.shift_top *= 1.1;
841     sctx.nshift_max = 5;
842     sctx.shift_lo   = 0.;
843     sctx.shift_hi   = 1.;
844   }
845 
846   sctx.shift_amount = 0.;
847   sctx.nshift       = 0;
848 #endif
849 
850   do {
851     sctx.newshift = PETSC_FALSE;
852     for (i = 0; i < n; i++) {
853       /* load in initial unfactored row */
854       nz    = ai[r[i] + 1] - ai[r[i]];
855       ajtmp = aj + ai[r[i]];
856       v     = a->a + ai[r[i]];
857       /* sort permuted ajtmp and values v accordingly */
858       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
859       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
860 
861       diag[r[i]] = ai[r[i]];
862       for (j = 0; j < nz; j++) {
863         rtmp[ajtmp[j]] = v[j];
864         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
865       }
866       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
867 
868       row = *ajtmp++;
869       while (row < i) {
870         pc = rtmp + row;
871         if (*pc != 0.0) {
872           pv = a->a + diag[r[row]];
873           pj = aj + diag[r[row]] + 1;
874 
875           multiplier = *pc / *pv++;
876           *pc        = multiplier;
877           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
878           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
879           PetscCall(PetscLogFlops(1 + 2.0 * nz));
880         }
881         row = *ajtmp++;
882       }
883       /* finished row so overwrite it onto a->a */
884       pv     = a->a + ai[r[i]];
885       pj     = aj + ai[r[i]];
886       nz     = ai[r[i] + 1] - ai[r[i]];
887       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
888 
889       rs = 0.0;
890       for (j = 0; j < nz; j++) {
891         pv[j] = rtmp[pj[j]];
892         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
893       }
894 
895       sctx.rs = rs;
896       sctx.pv = pv[nbdiag];
897       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
898       if (sctx.newshift) break;
899       pv[nbdiag] = sctx.pv;
900     }
901 
902     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
903       /*
904        * if no shift in this attempt & shifting & started shifting & can refine,
905        * then try lower shift
906        */
907       sctx.shift_hi       = sctx.shift_fraction;
908       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
909       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
910       sctx.newshift       = PETSC_TRUE;
911       sctx.nshift++;
912     }
913   } while (sctx.newshift);
914 
915   /* invert diagonal entries for simpler triangular solves */
916   for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];
917 
918   PetscCall(PetscFree(rtmp));
919   PetscCall(ISRestoreIndices(isicol, &ic));
920   PetscCall(ISRestoreIndices(isrow, &r));
921 
922   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
923   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
924   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
925   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
926 
927   A->assembled    = PETSC_TRUE;
928   A->preallocated = PETSC_TRUE;
929 
930   PetscCall(PetscLogFlops(A->cmap->n));
931   if (sctx.nshift) {
932     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
933       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
934     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
935       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
936     }
937   }
938   PetscFunctionReturn(PETSC_SUCCESS);
939 }
940 
941 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
942 {
943   Mat C;
944 
945   PetscFunctionBegin;
946   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
947   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
948   PetscCall(MatLUFactorNumeric(C, A, info));
949 
950   A->ops->solve          = C->ops->solve;
951   A->ops->solvetranspose = C->ops->solvetranspose;
952 
953   PetscCall(MatHeaderMerge(A, &C));
954   PetscFunctionReturn(PETSC_SUCCESS);
955 }
956 
957 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
958 {
959   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
960   IS                 iscol = a->col, isrow = a->row;
961   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
962   PetscInt           nz;
963   const PetscInt    *rout, *cout, *r, *c;
964   PetscScalar       *x, *tmp, *tmps, sum;
965   const PetscScalar *b;
966   const MatScalar   *aa = a->a, *v;
967 
968   PetscFunctionBegin;
969   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
970 
971   PetscCall(VecGetArrayRead(bb, &b));
972   PetscCall(VecGetArrayWrite(xx, &x));
973   tmp = a->solve_work;
974 
975   PetscCall(ISGetIndices(isrow, &rout));
976   r = rout;
977   PetscCall(ISGetIndices(iscol, &cout));
978   c = cout + (n - 1);
979 
980   /* forward solve the lower triangular */
981   tmp[0] = b[*r++];
982   tmps   = tmp;
983   for (i = 1; i < n; i++) {
984     v   = aa + ai[i];
985     vi  = aj + ai[i];
986     nz  = a->diag[i] - ai[i];
987     sum = b[*r++];
988     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
989     tmp[i] = sum;
990   }
991 
992   /* backward solve the upper triangular */
993   for (i = n - 1; i >= 0; i--) {
994     v   = aa + a->diag[i] + 1;
995     vi  = aj + a->diag[i] + 1;
996     nz  = ai[i + 1] - a->diag[i] - 1;
997     sum = tmp[i];
998     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
999     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
1000   }
1001 
1002   PetscCall(ISRestoreIndices(isrow, &rout));
1003   PetscCall(ISRestoreIndices(iscol, &cout));
1004   PetscCall(VecRestoreArrayRead(bb, &b));
1005   PetscCall(VecRestoreArrayWrite(xx, &x));
1006   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1007   PetscFunctionReturn(PETSC_SUCCESS);
1008 }
1009 
1010 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1011 {
1012   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1013   IS                 iscol = a->col, isrow = a->row;
1014   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1015   PetscInt           nz, neq, ldb, ldx;
1016   const PetscInt    *rout, *cout, *r, *c;
1017   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
1018   const PetscScalar *b, *aa  = a->a, *v;
1019   PetscBool          isdense;
1020 
1021   PetscFunctionBegin;
1022   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1023   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1024   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1025   if (X != B) {
1026     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1027     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1028   }
1029   PetscCall(MatDenseGetArrayRead(B, &b));
1030   PetscCall(MatDenseGetLDA(B, &ldb));
1031   PetscCall(MatDenseGetArray(X, &x));
1032   PetscCall(MatDenseGetLDA(X, &ldx));
1033   PetscCall(ISGetIndices(isrow, &rout));
1034   r = rout;
1035   PetscCall(ISGetIndices(iscol, &cout));
1036   c = cout;
1037   for (neq = 0; neq < B->cmap->n; neq++) {
1038     /* forward solve the lower triangular */
1039     tmp[0] = b[r[0]];
1040     tmps   = tmp;
1041     for (i = 1; i < n; i++) {
1042       v   = aa + ai[i];
1043       vi  = aj + ai[i];
1044       nz  = a->diag[i] - ai[i];
1045       sum = b[r[i]];
1046       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1047       tmp[i] = sum;
1048     }
1049     /* backward solve the upper triangular */
1050     for (i = n - 1; i >= 0; i--) {
1051       v   = aa + a->diag[i] + 1;
1052       vi  = aj + a->diag[i] + 1;
1053       nz  = ai[i + 1] - a->diag[i] - 1;
1054       sum = tmp[i];
1055       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1056       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1057     }
1058     b += ldb;
1059     x += ldx;
1060   }
1061   PetscCall(ISRestoreIndices(isrow, &rout));
1062   PetscCall(ISRestoreIndices(iscol, &cout));
1063   PetscCall(MatDenseRestoreArrayRead(B, &b));
1064   PetscCall(MatDenseRestoreArray(X, &x));
1065   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1066   PetscFunctionReturn(PETSC_SUCCESS);
1067 }
1068 
1069 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1070 {
1071   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1072   IS                 iscol = a->col, isrow = a->row;
1073   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1074   PetscInt           nz, neq, ldb, ldx;
1075   const PetscInt    *rout, *cout, *r, *c;
1076   PetscScalar       *x, *tmp = a->solve_work, sum;
1077   const PetscScalar *b, *aa  = a->a, *v;
1078   PetscBool          isdense;
1079 
1080   PetscFunctionBegin;
1081   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1082   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1083   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1084   if (X != B) {
1085     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1086     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1087   }
1088   PetscCall(MatDenseGetArrayRead(B, &b));
1089   PetscCall(MatDenseGetLDA(B, &ldb));
1090   PetscCall(MatDenseGetArray(X, &x));
1091   PetscCall(MatDenseGetLDA(X, &ldx));
1092   PetscCall(ISGetIndices(isrow, &rout));
1093   r = rout;
1094   PetscCall(ISGetIndices(iscol, &cout));
1095   c = cout;
1096   for (neq = 0; neq < B->cmap->n; neq++) {
1097     /* forward solve the lower triangular */
1098     tmp[0] = b[r[0]];
1099     v      = aa;
1100     vi     = aj;
1101     for (i = 1; i < n; i++) {
1102       nz  = ai[i + 1] - ai[i];
1103       sum = b[r[i]];
1104       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1105       tmp[i] = sum;
1106       v += nz;
1107       vi += nz;
1108     }
1109     /* backward solve the upper triangular */
1110     for (i = n - 1; i >= 0; i--) {
1111       v   = aa + adiag[i + 1] + 1;
1112       vi  = aj + adiag[i + 1] + 1;
1113       nz  = adiag[i] - adiag[i + 1] - 1;
1114       sum = tmp[i];
1115       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1116       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1117     }
1118     b += ldb;
1119     x += ldx;
1120   }
1121   PetscCall(ISRestoreIndices(isrow, &rout));
1122   PetscCall(ISRestoreIndices(iscol, &cout));
1123   PetscCall(MatDenseRestoreArrayRead(B, &b));
1124   PetscCall(MatDenseRestoreArray(X, &x));
1125   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1126   PetscFunctionReturn(PETSC_SUCCESS);
1127 }
1128 
1129 PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
1130 {
1131   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1132   IS                 iscol = a->col, isrow = a->row;
1133   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, j;
1134   PetscInt           nz, neq, ldb, ldx;
1135   const PetscInt    *rout, *cout, *r, *c;
1136   PetscScalar       *x, *tmp = a->solve_work, s1;
1137   const PetscScalar *b, *aa  = a->a, *v;
1138   PetscBool          isdense;
1139 
1140   PetscFunctionBegin;
1141   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1142   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1143   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1144   if (X != B) {
1145     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1146     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1147   }
1148   PetscCall(MatDenseGetArrayRead(B, &b));
1149   PetscCall(MatDenseGetLDA(B, &ldb));
1150   PetscCall(MatDenseGetArray(X, &x));
1151   PetscCall(MatDenseGetLDA(X, &ldx));
1152   tmp = a->solve_work;
1153   PetscCall(ISGetIndices(isrow, &rout));
1154   r = rout;
1155   PetscCall(ISGetIndices(iscol, &cout));
1156   c = cout;
1157   for (neq = 0; neq < B->cmap->n; neq++) {
1158     /* copy the b into temp work space according to permutation */
1159     for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1160 
1161     /* forward solve the U^T */
1162     for (i = 0; i < n; i++) {
1163       v  = aa + adiag[i + 1] + 1;
1164       vi = aj + adiag[i + 1] + 1;
1165       nz = adiag[i] - adiag[i + 1] - 1;
1166       s1 = tmp[i];
1167       s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1168       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1169       tmp[i] = s1;
1170     }
1171 
1172     /* backward solve the L^T */
1173     for (i = n - 1; i >= 0; i--) {
1174       v  = aa + ai[i];
1175       vi = aj + ai[i];
1176       nz = ai[i + 1] - ai[i];
1177       s1 = tmp[i];
1178       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1179     }
1180 
1181     /* copy tmp into x according to permutation */
1182     for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1183     b += ldb;
1184     x += ldx;
1185   }
1186   PetscCall(ISRestoreIndices(isrow, &rout));
1187   PetscCall(ISRestoreIndices(iscol, &cout));
1188   PetscCall(MatDenseRestoreArrayRead(B, &b));
1189   PetscCall(MatDenseRestoreArray(X, &x));
1190   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1191   PetscFunctionReturn(PETSC_SUCCESS);
1192 }
1193 
1194 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1195 {
1196   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1197   IS                 iscol = a->col, isrow = a->row;
1198   const PetscInt    *r, *c, *rout, *cout;
1199   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1200   PetscInt           nz, row;
1201   PetscScalar       *x, *tmp, *tmps, sum;
1202   const PetscScalar *b;
1203   const MatScalar   *aa = a->a, *v;
1204 
1205   PetscFunctionBegin;
1206   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1207 
1208   PetscCall(VecGetArrayRead(bb, &b));
1209   PetscCall(VecGetArrayWrite(xx, &x));
1210   tmp = a->solve_work;
1211 
1212   PetscCall(ISGetIndices(isrow, &rout));
1213   r = rout;
1214   PetscCall(ISGetIndices(iscol, &cout));
1215   c = cout + (n - 1);
1216 
1217   /* forward solve the lower triangular */
1218   tmp[0] = b[*r++];
1219   tmps   = tmp;
1220   for (row = 1; row < n; row++) {
1221     i   = rout[row]; /* permuted row */
1222     v   = aa + ai[i];
1223     vi  = aj + ai[i];
1224     nz  = a->diag[i] - ai[i];
1225     sum = b[*r++];
1226     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1227     tmp[row] = sum;
1228   }
1229 
1230   /* backward solve the upper triangular */
1231   for (row = n - 1; row >= 0; row--) {
1232     i   = rout[row]; /* permuted row */
1233     v   = aa + a->diag[i] + 1;
1234     vi  = aj + a->diag[i] + 1;
1235     nz  = ai[i + 1] - a->diag[i] - 1;
1236     sum = tmp[row];
1237     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1238     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1239   }
1240 
1241   PetscCall(ISRestoreIndices(isrow, &rout));
1242   PetscCall(ISRestoreIndices(iscol, &cout));
1243   PetscCall(VecRestoreArrayRead(bb, &b));
1244   PetscCall(VecRestoreArrayWrite(xx, &x));
1245   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1246   PetscFunctionReturn(PETSC_SUCCESS);
1247 }
1248 
1249 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1250 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1251 {
1252   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1253   PetscInt           n  = A->rmap->n;
1254   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1255   PetscScalar       *x;
1256   const PetscScalar *b;
1257   const MatScalar   *aa = a->a;
1258 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1259   PetscInt         adiag_i, i, nz, ai_i;
1260   const PetscInt  *vi;
1261   const MatScalar *v;
1262   PetscScalar      sum;
1263 #endif
1264 
1265   PetscFunctionBegin;
1266   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1267 
1268   PetscCall(VecGetArrayRead(bb, &b));
1269   PetscCall(VecGetArrayWrite(xx, &x));
1270 
1271 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1272   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1273 #else
1274   /* forward solve the lower triangular */
1275   x[0] = b[0];
1276   for (i = 1; i < n; i++) {
1277     ai_i = ai[i];
1278     v    = aa + ai_i;
1279     vi   = aj + ai_i;
1280     nz   = adiag[i] - ai_i;
1281     sum  = b[i];
1282     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1283     x[i] = sum;
1284   }
1285 
1286   /* backward solve the upper triangular */
1287   for (i = n - 1; i >= 0; i--) {
1288     adiag_i = adiag[i];
1289     v       = aa + adiag_i + 1;
1290     vi      = aj + adiag_i + 1;
1291     nz      = ai[i + 1] - adiag_i - 1;
1292     sum     = x[i];
1293     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1294     x[i] = sum * aa[adiag_i];
1295   }
1296 #endif
1297   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1298   PetscCall(VecRestoreArrayRead(bb, &b));
1299   PetscCall(VecRestoreArrayWrite(xx, &x));
1300   PetscFunctionReturn(PETSC_SUCCESS);
1301 }
1302 
1303 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1304 {
1305   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1306   IS                 iscol = a->col, isrow = a->row;
1307   PetscInt           i, n                  = A->rmap->n, j;
1308   PetscInt           nz;
1309   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1310   PetscScalar       *x, *tmp, sum;
1311   const PetscScalar *b;
1312   const MatScalar   *aa = a->a, *v;
1313 
1314   PetscFunctionBegin;
1315   if (yy != xx) PetscCall(VecCopy(yy, xx));
1316 
1317   PetscCall(VecGetArrayRead(bb, &b));
1318   PetscCall(VecGetArray(xx, &x));
1319   tmp = a->solve_work;
1320 
1321   PetscCall(ISGetIndices(isrow, &rout));
1322   r = rout;
1323   PetscCall(ISGetIndices(iscol, &cout));
1324   c = cout + (n - 1);
1325 
1326   /* forward solve the lower triangular */
1327   tmp[0] = b[*r++];
1328   for (i = 1; i < n; i++) {
1329     v   = aa + ai[i];
1330     vi  = aj + ai[i];
1331     nz  = a->diag[i] - ai[i];
1332     sum = b[*r++];
1333     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1334     tmp[i] = sum;
1335   }
1336 
1337   /* backward solve the upper triangular */
1338   for (i = n - 1; i >= 0; i--) {
1339     v   = aa + a->diag[i] + 1;
1340     vi  = aj + a->diag[i] + 1;
1341     nz  = ai[i + 1] - a->diag[i] - 1;
1342     sum = tmp[i];
1343     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1344     tmp[i] = sum * aa[a->diag[i]];
1345     x[*c--] += tmp[i];
1346   }
1347 
1348   PetscCall(ISRestoreIndices(isrow, &rout));
1349   PetscCall(ISRestoreIndices(iscol, &cout));
1350   PetscCall(VecRestoreArrayRead(bb, &b));
1351   PetscCall(VecRestoreArray(xx, &x));
1352   PetscCall(PetscLogFlops(2.0 * a->nz));
1353   PetscFunctionReturn(PETSC_SUCCESS);
1354 }
1355 
1356 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1357 {
1358   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1359   IS                 iscol = a->col, isrow = a->row;
1360   PetscInt           i, n                  = A->rmap->n, j;
1361   PetscInt           nz;
1362   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1363   PetscScalar       *x, *tmp, sum;
1364   const PetscScalar *b;
1365   const MatScalar   *aa = a->a, *v;
1366 
1367   PetscFunctionBegin;
1368   if (yy != xx) PetscCall(VecCopy(yy, xx));
1369 
1370   PetscCall(VecGetArrayRead(bb, &b));
1371   PetscCall(VecGetArray(xx, &x));
1372   tmp = a->solve_work;
1373 
1374   PetscCall(ISGetIndices(isrow, &rout));
1375   r = rout;
1376   PetscCall(ISGetIndices(iscol, &cout));
1377   c = cout;
1378 
1379   /* forward solve the lower triangular */
1380   tmp[0] = b[r[0]];
1381   v      = aa;
1382   vi     = aj;
1383   for (i = 1; i < n; i++) {
1384     nz  = ai[i + 1] - ai[i];
1385     sum = b[r[i]];
1386     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1387     tmp[i] = sum;
1388     v += nz;
1389     vi += nz;
1390   }
1391 
1392   /* backward solve the upper triangular */
1393   v  = aa + adiag[n - 1];
1394   vi = aj + adiag[n - 1];
1395   for (i = n - 1; i >= 0; i--) {
1396     nz  = adiag[i] - adiag[i + 1] - 1;
1397     sum = tmp[i];
1398     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1399     tmp[i] = sum * v[nz];
1400     x[c[i]] += tmp[i];
1401     v += nz + 1;
1402     vi += nz + 1;
1403   }
1404 
1405   PetscCall(ISRestoreIndices(isrow, &rout));
1406   PetscCall(ISRestoreIndices(iscol, &cout));
1407   PetscCall(VecRestoreArrayRead(bb, &b));
1408   PetscCall(VecRestoreArray(xx, &x));
1409   PetscCall(PetscLogFlops(2.0 * a->nz));
1410   PetscFunctionReturn(PETSC_SUCCESS);
1411 }
1412 
1413 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1414 {
1415   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1416   IS                 iscol = a->col, isrow = a->row;
1417   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1418   PetscInt           i, n = A->rmap->n, j;
1419   PetscInt           nz;
1420   PetscScalar       *x, *tmp, s1;
1421   const MatScalar   *aa = a->a, *v;
1422   const PetscScalar *b;
1423 
1424   PetscFunctionBegin;
1425   PetscCall(VecGetArrayRead(bb, &b));
1426   PetscCall(VecGetArrayWrite(xx, &x));
1427   tmp = a->solve_work;
1428 
1429   PetscCall(ISGetIndices(isrow, &rout));
1430   r = rout;
1431   PetscCall(ISGetIndices(iscol, &cout));
1432   c = cout;
1433 
1434   /* copy the b into temp work space according to permutation */
1435   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1436 
1437   /* forward solve the U^T */
1438   for (i = 0; i < n; i++) {
1439     v  = aa + diag[i];
1440     vi = aj + diag[i] + 1;
1441     nz = ai[i + 1] - diag[i] - 1;
1442     s1 = tmp[i];
1443     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1444     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1445     tmp[i] = s1;
1446   }
1447 
1448   /* backward solve the L^T */
1449   for (i = n - 1; i >= 0; i--) {
1450     v  = aa + diag[i] - 1;
1451     vi = aj + diag[i] - 1;
1452     nz = diag[i] - ai[i];
1453     s1 = tmp[i];
1454     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1455   }
1456 
1457   /* copy tmp into x according to permutation */
1458   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1459 
1460   PetscCall(ISRestoreIndices(isrow, &rout));
1461   PetscCall(ISRestoreIndices(iscol, &cout));
1462   PetscCall(VecRestoreArrayRead(bb, &b));
1463   PetscCall(VecRestoreArrayWrite(xx, &x));
1464 
1465   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1466   PetscFunctionReturn(PETSC_SUCCESS);
1467 }
1468 
1469 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1470 {
1471   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1472   IS                 iscol = a->col, isrow = a->row;
1473   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1474   PetscInt           i, n = A->rmap->n, j;
1475   PetscInt           nz;
1476   PetscScalar       *x, *tmp, s1;
1477   const MatScalar   *aa = a->a, *v;
1478   const PetscScalar *b;
1479 
1480   PetscFunctionBegin;
1481   PetscCall(VecGetArrayRead(bb, &b));
1482   PetscCall(VecGetArrayWrite(xx, &x));
1483   tmp = a->solve_work;
1484 
1485   PetscCall(ISGetIndices(isrow, &rout));
1486   r = rout;
1487   PetscCall(ISGetIndices(iscol, &cout));
1488   c = cout;
1489 
1490   /* copy the b into temp work space according to permutation */
1491   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1492 
1493   /* forward solve the U^T */
1494   for (i = 0; i < n; i++) {
1495     v  = aa + adiag[i + 1] + 1;
1496     vi = aj + adiag[i + 1] + 1;
1497     nz = adiag[i] - adiag[i + 1] - 1;
1498     s1 = tmp[i];
1499     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1500     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1501     tmp[i] = s1;
1502   }
1503 
1504   /* backward solve the L^T */
1505   for (i = n - 1; i >= 0; i--) {
1506     v  = aa + ai[i];
1507     vi = aj + ai[i];
1508     nz = ai[i + 1] - ai[i];
1509     s1 = tmp[i];
1510     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1511   }
1512 
1513   /* copy tmp into x according to permutation */
1514   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1515 
1516   PetscCall(ISRestoreIndices(isrow, &rout));
1517   PetscCall(ISRestoreIndices(iscol, &cout));
1518   PetscCall(VecRestoreArrayRead(bb, &b));
1519   PetscCall(VecRestoreArrayWrite(xx, &x));
1520 
1521   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1522   PetscFunctionReturn(PETSC_SUCCESS);
1523 }
1524 
1525 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1526 {
1527   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1528   IS                 iscol = a->col, isrow = a->row;
1529   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1530   PetscInt           i, n = A->rmap->n, j;
1531   PetscInt           nz;
1532   PetscScalar       *x, *tmp, s1;
1533   const MatScalar   *aa = a->a, *v;
1534   const PetscScalar *b;
1535 
1536   PetscFunctionBegin;
1537   if (zz != xx) PetscCall(VecCopy(zz, xx));
1538   PetscCall(VecGetArrayRead(bb, &b));
1539   PetscCall(VecGetArray(xx, &x));
1540   tmp = a->solve_work;
1541 
1542   PetscCall(ISGetIndices(isrow, &rout));
1543   r = rout;
1544   PetscCall(ISGetIndices(iscol, &cout));
1545   c = cout;
1546 
1547   /* copy the b into temp work space according to permutation */
1548   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1549 
1550   /* forward solve the U^T */
1551   for (i = 0; i < n; i++) {
1552     v  = aa + diag[i];
1553     vi = aj + diag[i] + 1;
1554     nz = ai[i + 1] - diag[i] - 1;
1555     s1 = tmp[i];
1556     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1557     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1558     tmp[i] = s1;
1559   }
1560 
1561   /* backward solve the L^T */
1562   for (i = n - 1; i >= 0; i--) {
1563     v  = aa + diag[i] - 1;
1564     vi = aj + diag[i] - 1;
1565     nz = diag[i] - ai[i];
1566     s1 = tmp[i];
1567     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1568   }
1569 
1570   /* copy tmp into x according to permutation */
1571   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1572 
1573   PetscCall(ISRestoreIndices(isrow, &rout));
1574   PetscCall(ISRestoreIndices(iscol, &cout));
1575   PetscCall(VecRestoreArrayRead(bb, &b));
1576   PetscCall(VecRestoreArray(xx, &x));
1577 
1578   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1579   PetscFunctionReturn(PETSC_SUCCESS);
1580 }
1581 
1582 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1583 {
1584   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1585   IS                 iscol = a->col, isrow = a->row;
1586   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1587   PetscInt           i, n = A->rmap->n, j;
1588   PetscInt           nz;
1589   PetscScalar       *x, *tmp, s1;
1590   const MatScalar   *aa = a->a, *v;
1591   const PetscScalar *b;
1592 
1593   PetscFunctionBegin;
1594   if (zz != xx) PetscCall(VecCopy(zz, xx));
1595   PetscCall(VecGetArrayRead(bb, &b));
1596   PetscCall(VecGetArray(xx, &x));
1597   tmp = a->solve_work;
1598 
1599   PetscCall(ISGetIndices(isrow, &rout));
1600   r = rout;
1601   PetscCall(ISGetIndices(iscol, &cout));
1602   c = cout;
1603 
1604   /* copy the b into temp work space according to permutation */
1605   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1606 
1607   /* forward solve the U^T */
1608   for (i = 0; i < n; i++) {
1609     v  = aa + adiag[i + 1] + 1;
1610     vi = aj + adiag[i + 1] + 1;
1611     nz = adiag[i] - adiag[i + 1] - 1;
1612     s1 = tmp[i];
1613     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1614     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1615     tmp[i] = s1;
1616   }
1617 
1618   /* backward solve the L^T */
1619   for (i = n - 1; i >= 0; i--) {
1620     v  = aa + ai[i];
1621     vi = aj + ai[i];
1622     nz = ai[i + 1] - ai[i];
1623     s1 = tmp[i];
1624     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1625   }
1626 
1627   /* copy tmp into x according to permutation */
1628   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1629 
1630   PetscCall(ISRestoreIndices(isrow, &rout));
1631   PetscCall(ISRestoreIndices(iscol, &cout));
1632   PetscCall(VecRestoreArrayRead(bb, &b));
1633   PetscCall(VecRestoreArray(xx, &x));
1634 
1635   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1636   PetscFunctionReturn(PETSC_SUCCESS);
1637 }
1638 
1639 /*
1640    ilu() under revised new data structure.
1641    Factored arrays bj and ba are stored as
1642      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1643 
1644    bi=fact->i is an array of size n+1, in which
1645      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1646      bi[n]:  points to L(n-1,n-1)+1
1647 
1648   bdiag=fact->diag is an array of size n+1,in which
1649      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1650      bdiag[n]: points to entry of U(n-1,0)-1
1651 
1652    U(i,:) contains bdiag[i] as its last entry, i.e.,
1653     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1654 */
1655 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1656 {
1657   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1658   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1659   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1660   IS             isicol;
1661 
1662   PetscFunctionBegin;
1663   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1664   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1665   b = (Mat_SeqAIJ *)(fact)->data;
1666 
1667   /* allocate matrix arrays for new data structure */
1668   PetscCall(PetscShmgetAllocateArray(ai[n] + 1, sizeof(PetscScalar), (void **)&b->a));
1669   PetscCall(PetscShmgetAllocateArray(ai[n] + 1, sizeof(PetscInt), (void **)&b->j));
1670   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i));
1671   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1672   b->free_a  = PETSC_TRUE;
1673   b->free_ij = PETSC_TRUE;
1674 
1675   if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1676   bdiag = b->diag;
1677 
1678   /* set bi and bj with new data structure */
1679   bi = b->i;
1680   bj = b->j;
1681 
1682   /* L part */
1683   bi[0] = 0;
1684   for (i = 0; i < n; i++) {
1685     nz        = adiag[i] - ai[i];
1686     bi[i + 1] = bi[i] + nz;
1687     aj        = a->j + ai[i];
1688     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1689   }
1690 
1691   /* U part */
1692   bdiag[n] = bi[n] - 1;
1693   for (i = n - 1; i >= 0; i--) {
1694     nz = ai[i + 1] - adiag[i] - 1;
1695     aj = a->j + adiag[i] + 1;
1696     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1697     /* diag[i] */
1698     bj[k++]  = i;
1699     bdiag[i] = bdiag[i + 1] + nz + 1;
1700   }
1701 
1702   fact->factortype             = MAT_FACTOR_ILU;
1703   fact->info.factor_mallocs    = 0;
1704   fact->info.fill_ratio_given  = info->fill;
1705   fact->info.fill_ratio_needed = 1.0;
1706   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1707   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1708 
1709   b       = (Mat_SeqAIJ *)(fact)->data;
1710   b->row  = isrow;
1711   b->col  = iscol;
1712   b->icol = isicol;
1713   PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
1714   PetscCall(PetscObjectReference((PetscObject)isrow));
1715   PetscCall(PetscObjectReference((PetscObject)iscol));
1716   PetscFunctionReturn(PETSC_SUCCESS);
1717 }
1718 
1719 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1720 {
1721   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1722   IS                 isicol;
1723   const PetscInt    *r, *ic;
1724   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1725   PetscInt          *bi, *cols, nnz, *cols_lvl;
1726   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1727   PetscInt           i, levels, diagonal_fill;
1728   PetscBool          col_identity, row_identity, missing;
1729   PetscReal          f;
1730   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1731   PetscBT            lnkbt;
1732   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1733   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1734   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1735 
1736   PetscFunctionBegin;
1737   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1738   PetscCall(MatMissingDiagonal(A, &missing, &i));
1739   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1740 
1741   levels = (PetscInt)info->levels;
1742   PetscCall(ISIdentity(isrow, &row_identity));
1743   PetscCall(ISIdentity(iscol, &col_identity));
1744   if (!levels && row_identity && col_identity) {
1745     /* special case: ilu(0) with natural ordering */
1746     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1747     if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1748     PetscFunctionReturn(PETSC_SUCCESS);
1749   }
1750 
1751   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1752   PetscCall(ISGetIndices(isrow, &r));
1753   PetscCall(ISGetIndices(isicol, &ic));
1754 
1755   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1756   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
1757   PetscCall(PetscMalloc1(n + 1, &bdiag));
1758   bi[0] = bdiag[0] = 0;
1759   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1760 
1761   /* create a linked list for storing column indices of the active row */
1762   nlnk = n + 1;
1763   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1764 
1765   /* initial FreeSpace size is f*(ai[n]+1) */
1766   f             = info->fill;
1767   diagonal_fill = (PetscInt)info->diagonal_fill;
1768   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1769   current_space = free_space;
1770   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1771   current_space_lvl = free_space_lvl;
1772   for (i = 0; i < n; i++) {
1773     nzi = 0;
1774     /* copy current row into linked list */
1775     nnz = ai[r[i] + 1] - ai[r[i]];
1776     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1777     cols   = aj + ai[r[i]];
1778     lnk[i] = -1; /* marker to indicate if diagonal exists */
1779     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1780     nzi += nlnk;
1781 
1782     /* make sure diagonal entry is included */
1783     if (diagonal_fill && lnk[i] == -1) {
1784       fm = n;
1785       while (lnk[fm] < i) fm = lnk[fm];
1786       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1787       lnk[fm]    = i;
1788       lnk_lvl[i] = 0;
1789       nzi++;
1790       dcount++;
1791     }
1792 
1793     /* add pivot rows into the active row */
1794     nzbd = 0;
1795     prow = lnk[n];
1796     while (prow < i) {
1797       nnz      = bdiag[prow];
1798       cols     = bj_ptr[prow] + nnz + 1;
1799       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1800       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1801       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1802       nzi += nlnk;
1803       prow = lnk[prow];
1804       nzbd++;
1805     }
1806     bdiag[i]  = nzbd;
1807     bi[i + 1] = bi[i] + nzi;
1808     /* if free space is not available, make more free space */
1809     if (current_space->local_remaining < nzi) {
1810       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1811       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1812       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1813       reallocs++;
1814     }
1815 
1816     /* copy data into free_space and free_space_lvl, then initialize lnk */
1817     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1818     bj_ptr[i]    = current_space->array;
1819     bjlvl_ptr[i] = current_space_lvl->array;
1820 
1821     /* make sure the active row i has diagonal entry */
1822     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1823 
1824     current_space->array += nzi;
1825     current_space->local_used += nzi;
1826     current_space->local_remaining -= nzi;
1827     current_space_lvl->array += nzi;
1828     current_space_lvl->local_used += nzi;
1829     current_space_lvl->local_remaining -= nzi;
1830   }
1831 
1832   PetscCall(ISRestoreIndices(isrow, &r));
1833   PetscCall(ISRestoreIndices(isicol, &ic));
1834   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1835   PetscCall(PetscShmgetAllocateArray(bi[n] + 1, sizeof(PetscInt), (void **)&bj));
1836   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1837 
1838   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1839   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1840   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1841 
1842 #if defined(PETSC_USE_INFO)
1843   {
1844     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1845     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1846     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1847     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1848     PetscCall(PetscInfo(A, "for best performance.\n"));
1849     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1850   }
1851 #endif
1852   /* put together the new matrix */
1853   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1854   b          = (Mat_SeqAIJ *)(fact)->data;
1855   b->free_ij = PETSC_TRUE;
1856   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
1857   b->free_a = PETSC_TRUE;
1858   b->j      = bj;
1859   b->i      = bi;
1860   b->diag   = bdiag;
1861   b->ilen   = NULL;
1862   b->imax   = NULL;
1863   b->row    = isrow;
1864   b->col    = iscol;
1865   PetscCall(PetscObjectReference((PetscObject)isrow));
1866   PetscCall(PetscObjectReference((PetscObject)iscol));
1867   b->icol = isicol;
1868 
1869   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1870   /* In b structure:  Free imax, ilen, old a, old j.
1871      Allocate bdiag, solve_work, new a, new j */
1872   b->maxnz = b->nz = bdiag[0] + 1;
1873 
1874   (fact)->info.factor_mallocs    = reallocs;
1875   (fact)->info.fill_ratio_given  = f;
1876   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1877   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1878   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1879   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1880   PetscFunctionReturn(PETSC_SUCCESS);
1881 }
1882 
1883 #if 0
1884 // unused
1885 static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1886 {
1887   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1888   IS                 isicol;
1889   const PetscInt    *r, *ic;
1890   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1891   PetscInt          *bi, *cols, nnz, *cols_lvl;
1892   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1893   PetscInt           i, levels, diagonal_fill;
1894   PetscBool          col_identity, row_identity;
1895   PetscReal          f;
1896   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1897   PetscBT            lnkbt;
1898   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1899   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1900   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1901   PetscBool          missing;
1902 
1903   PetscFunctionBegin;
1904   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1905   PetscCall(MatMissingDiagonal(A, &missing, &i));
1906   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1907 
1908   f             = info->fill;
1909   levels        = (PetscInt)info->levels;
1910   diagonal_fill = (PetscInt)info->diagonal_fill;
1911 
1912   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1913 
1914   PetscCall(ISIdentity(isrow, &row_identity));
1915   PetscCall(ISIdentity(iscol, &col_identity));
1916   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1917     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));
1918 
1919     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1920     if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1921     fact->factortype               = MAT_FACTOR_ILU;
1922     (fact)->info.factor_mallocs    = 0;
1923     (fact)->info.fill_ratio_given  = info->fill;
1924     (fact)->info.fill_ratio_needed = 1.0;
1925 
1926     b       = (Mat_SeqAIJ *)(fact)->data;
1927     b->row  = isrow;
1928     b->col  = iscol;
1929     b->icol = isicol;
1930     PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1931     PetscCall(PetscObjectReference((PetscObject)isrow));
1932     PetscCall(PetscObjectReference((PetscObject)iscol));
1933     PetscFunctionReturn(PETSC_SUCCESS);
1934   }
1935 
1936   PetscCall(ISGetIndices(isrow, &r));
1937   PetscCall(ISGetIndices(isicol, &ic));
1938 
1939   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1940   PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi));
1941   PetscCall(PetscMalloc1(n + 1, &bdiag));
1942   bi[0] = bdiag[0] = 0;
1943 
1944   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1945 
1946   /* create a linked list for storing column indices of the active row */
1947   nlnk = n + 1;
1948   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1949 
1950   /* initial FreeSpace size is f*(ai[n]+1) */
1951   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1952   current_space = free_space;
1953   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1954   current_space_lvl = free_space_lvl;
1955 
1956   for (i = 0; i < n; i++) {
1957     nzi = 0;
1958     /* copy current row into linked list */
1959     nnz = ai[r[i] + 1] - ai[r[i]];
1960     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1961     cols   = aj + ai[r[i]];
1962     lnk[i] = -1; /* marker to indicate if diagonal exists */
1963     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1964     nzi += nlnk;
1965 
1966     /* make sure diagonal entry is included */
1967     if (diagonal_fill && lnk[i] == -1) {
1968       fm = n;
1969       while (lnk[fm] < i) fm = lnk[fm];
1970       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1971       lnk[fm]    = i;
1972       lnk_lvl[i] = 0;
1973       nzi++;
1974       dcount++;
1975     }
1976 
1977     /* add pivot rows into the active row */
1978     nzbd = 0;
1979     prow = lnk[n];
1980     while (prow < i) {
1981       nnz      = bdiag[prow];
1982       cols     = bj_ptr[prow] + nnz + 1;
1983       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1984       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1985       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1986       nzi += nlnk;
1987       prow = lnk[prow];
1988       nzbd++;
1989     }
1990     bdiag[i]  = nzbd;
1991     bi[i + 1] = bi[i] + nzi;
1992 
1993     /* if free space is not available, make more free space */
1994     if (current_space->local_remaining < nzi) {
1995       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
1996       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1997       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1998       reallocs++;
1999     }
2000 
2001     /* copy data into free_space and free_space_lvl, then initialize lnk */
2002     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2003     bj_ptr[i]    = current_space->array;
2004     bjlvl_ptr[i] = current_space_lvl->array;
2005 
2006     /* make sure the active row i has diagonal entry */
2007     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
2008 
2009     current_space->array += nzi;
2010     current_space->local_used += nzi;
2011     current_space->local_remaining -= nzi;
2012     current_space_lvl->array += nzi;
2013     current_space_lvl->local_used += nzi;
2014     current_space_lvl->local_remaining -= nzi;
2015   }
2016 
2017   PetscCall(ISRestoreIndices(isrow, &r));
2018   PetscCall(ISRestoreIndices(isicol, &ic));
2019 
2020   /* destroy list of free space and other temporary arrays */
2021   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj));
2022   PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
2023   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2024   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2025   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
2026 
2027   #if defined(PETSC_USE_INFO)
2028   {
2029     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2030     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
2031     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
2032     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
2033     PetscCall(PetscInfo(A, "for best performance.\n"));
2034     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
2035   }
2036   #endif
2037 
2038   /* put together the new matrix */
2039   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
2040   b = (Mat_SeqAIJ *)(fact)->data;
2041   b->free_ij      = PETSC_TRUE;
2042   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a));
2043   b->free_a       = PETSC_TRUE;
2044   b->j = bj;
2045   b->i = bi;
2046   for (i = 0; i < n; i++) bdiag[i] += bi[i];
2047   b->diag = bdiag;
2048   b->ilen = NULL;
2049   b->imax = NULL;
2050   b->row  = isrow;
2051   b->col  = iscol;
2052   PetscCall(PetscObjectReference((PetscObject)isrow));
2053   PetscCall(PetscObjectReference((PetscObject)iscol));
2054   b->icol = isicol;
2055   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2056   /* In b structure:  Free imax, ilen, old a, old j.
2057      Allocate bdiag, solve_work, new a, new j */
2058   b->maxnz = b->nz = bi[n];
2059 
2060   (fact)->info.factor_mallocs    = reallocs;
2061   (fact)->info.fill_ratio_given  = f;
2062   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2063   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
2064   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2065   PetscFunctionReturn(PETSC_SUCCESS);
2066 }
2067 #endif
2068 
2069 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2070 {
2071   Mat             C  = B;
2072   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2073   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2074   IS              ip = b->row, iip = b->icol;
2075   const PetscInt *rip, *riip;
2076   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
2077   PetscInt       *ai = a->i, *aj = a->j;
2078   PetscInt        k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2079   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2080   PetscBool       perm_identity;
2081   FactorShiftCtx  sctx;
2082   PetscReal       rs;
2083   MatScalar       d, *v;
2084 
2085   PetscFunctionBegin;
2086   /* MatPivotSetUp(): initialize shift context sctx */
2087   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2088 
2089   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2090     sctx.shift_top = info->zeropivot;
2091     for (i = 0; i < mbs; i++) {
2092       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2093       d  = (aa)[a->diag[i]];
2094       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2095       v  = aa + ai[i];
2096       nz = ai[i + 1] - ai[i];
2097       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2098       if (rs > sctx.shift_top) sctx.shift_top = rs;
2099     }
2100     sctx.shift_top *= 1.1;
2101     sctx.nshift_max = 5;
2102     sctx.shift_lo   = 0.;
2103     sctx.shift_hi   = 1.;
2104   }
2105 
2106   PetscCall(ISGetIndices(ip, &rip));
2107   PetscCall(ISGetIndices(iip, &riip));
2108 
2109   /* allocate working arrays
2110      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2111      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2112   */
2113   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
2114 
2115   do {
2116     sctx.newshift = PETSC_FALSE;
2117 
2118     for (i = 0; i < mbs; i++) c2r[i] = mbs;
2119     if (mbs) il[0] = 0;
2120 
2121     for (k = 0; k < mbs; k++) {
2122       /* zero rtmp */
2123       nz    = bi[k + 1] - bi[k];
2124       bjtmp = bj + bi[k];
2125       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2126 
2127       /* load in initial unfactored row */
2128       bval = ba + bi[k];
2129       jmin = ai[rip[k]];
2130       jmax = ai[rip[k] + 1];
2131       for (j = jmin; j < jmax; j++) {
2132         col = riip[aj[j]];
2133         if (col >= k) { /* only take upper triangular entry */
2134           rtmp[col] = aa[j];
2135           *bval++   = 0.0; /* for in-place factorization */
2136         }
2137       }
2138       /* shift the diagonal of the matrix: ZeropivotApply() */
2139       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2140 
2141       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2142       dk = rtmp[k];
2143       i  = c2r[k]; /* first row to be added to k_th row  */
2144 
2145       while (i < k) {
2146         nexti = c2r[i]; /* next row to be added to k_th row */
2147 
2148         /* compute multiplier, update diag(k) and U(i,k) */
2149         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
2150         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2151         dk += uikdi * ba[ili];           /* update diag[k] */
2152         ba[ili] = uikdi;                 /* -U(i,k) */
2153 
2154         /* add multiple of row i to k-th row */
2155         jmin = ili + 1;
2156         jmax = bi[i + 1];
2157         if (jmin < jmax) {
2158           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2159           /* update il and c2r for row i */
2160           il[i]  = jmin;
2161           j      = bj[jmin];
2162           c2r[i] = c2r[j];
2163           c2r[j] = i;
2164         }
2165         i = nexti;
2166       }
2167 
2168       /* copy data into U(k,:) */
2169       rs   = 0.0;
2170       jmin = bi[k];
2171       jmax = bi[k + 1] - 1;
2172       if (jmin < jmax) {
2173         for (j = jmin; j < jmax; j++) {
2174           col   = bj[j];
2175           ba[j] = rtmp[col];
2176           rs += PetscAbsScalar(ba[j]);
2177         }
2178         /* add the k-th row into il and c2r */
2179         il[k]  = jmin;
2180         i      = bj[jmin];
2181         c2r[k] = c2r[i];
2182         c2r[i] = k;
2183       }
2184 
2185       /* MatPivotCheck() */
2186       sctx.rs = rs;
2187       sctx.pv = dk;
2188       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2189       if (sctx.newshift) break;
2190       dk = sctx.pv;
2191 
2192       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2193     }
2194   } while (sctx.newshift);
2195 
2196   PetscCall(PetscFree3(rtmp, il, c2r));
2197   PetscCall(ISRestoreIndices(ip, &rip));
2198   PetscCall(ISRestoreIndices(iip, &riip));
2199 
2200   PetscCall(ISIdentity(ip, &perm_identity));
2201   if (perm_identity) {
2202     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2203     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2204     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2205     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2206   } else {
2207     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2208     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2209     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2210     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2211   }
2212 
2213   C->assembled    = PETSC_TRUE;
2214   C->preallocated = PETSC_TRUE;
2215 
2216   PetscCall(PetscLogFlops(C->rmap->n));
2217 
2218   /* MatPivotView() */
2219   if (sctx.nshift) {
2220     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2221       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2222     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2223       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2224     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2225       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2226     }
2227   }
2228   PetscFunctionReturn(PETSC_SUCCESS);
2229 }
2230 
2231 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2232 {
2233   Mat             C  = B;
2234   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2235   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2236   IS              ip = b->row, iip = b->icol;
2237   const PetscInt *rip, *riip;
2238   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2239   PetscInt       *ai = a->i, *aj = a->j;
2240   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2241   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2242   PetscBool       perm_identity;
2243   FactorShiftCtx  sctx;
2244   PetscReal       rs;
2245   MatScalar       d, *v;
2246 
2247   PetscFunctionBegin;
2248   /* MatPivotSetUp(): initialize shift context sctx */
2249   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2250 
2251   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2252     sctx.shift_top = info->zeropivot;
2253     for (i = 0; i < mbs; i++) {
2254       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2255       d  = (aa)[a->diag[i]];
2256       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2257       v  = aa + ai[i];
2258       nz = ai[i + 1] - ai[i];
2259       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2260       if (rs > sctx.shift_top) sctx.shift_top = rs;
2261     }
2262     sctx.shift_top *= 1.1;
2263     sctx.nshift_max = 5;
2264     sctx.shift_lo   = 0.;
2265     sctx.shift_hi   = 1.;
2266   }
2267 
2268   PetscCall(ISGetIndices(ip, &rip));
2269   PetscCall(ISGetIndices(iip, &riip));
2270 
2271   /* initialization */
2272   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
2273 
2274   do {
2275     sctx.newshift = PETSC_FALSE;
2276 
2277     for (i = 0; i < mbs; i++) jl[i] = mbs;
2278     il[0] = 0;
2279 
2280     for (k = 0; k < mbs; k++) {
2281       /* zero rtmp */
2282       nz    = bi[k + 1] - bi[k];
2283       bjtmp = bj + bi[k];
2284       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2285 
2286       bval = ba + bi[k];
2287       /* initialize k-th row by the perm[k]-th row of A */
2288       jmin = ai[rip[k]];
2289       jmax = ai[rip[k] + 1];
2290       for (j = jmin; j < jmax; j++) {
2291         col = riip[aj[j]];
2292         if (col >= k) { /* only take upper triangular entry */
2293           rtmp[col] = aa[j];
2294           *bval++   = 0.0; /* for in-place factorization */
2295         }
2296       }
2297       /* shift the diagonal of the matrix */
2298       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2299 
2300       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2301       dk = rtmp[k];
2302       i  = jl[k]; /* first row to be added to k_th row  */
2303 
2304       while (i < k) {
2305         nexti = jl[i]; /* next row to be added to k_th row */
2306 
2307         /* compute multiplier, update diag(k) and U(i,k) */
2308         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
2309         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2310         dk += uikdi * ba[ili];
2311         ba[ili] = uikdi; /* -U(i,k) */
2312 
2313         /* add multiple of row i to k-th row */
2314         jmin = ili + 1;
2315         jmax = bi[i + 1];
2316         if (jmin < jmax) {
2317           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2318           /* update il and jl for row i */
2319           il[i] = jmin;
2320           j     = bj[jmin];
2321           jl[i] = jl[j];
2322           jl[j] = i;
2323         }
2324         i = nexti;
2325       }
2326 
2327       /* shift the diagonals when zero pivot is detected */
2328       /* compute rs=sum of abs(off-diagonal) */
2329       rs   = 0.0;
2330       jmin = bi[k] + 1;
2331       nz   = bi[k + 1] - jmin;
2332       bcol = bj + jmin;
2333       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
2334 
2335       sctx.rs = rs;
2336       sctx.pv = dk;
2337       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2338       if (sctx.newshift) break;
2339       dk = sctx.pv;
2340 
2341       /* copy data into U(k,:) */
2342       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2343       jmin      = bi[k] + 1;
2344       jmax      = bi[k + 1];
2345       if (jmin < jmax) {
2346         for (j = jmin; j < jmax; j++) {
2347           col   = bj[j];
2348           ba[j] = rtmp[col];
2349         }
2350         /* add the k-th row into il and jl */
2351         il[k] = jmin;
2352         i     = bj[jmin];
2353         jl[k] = jl[i];
2354         jl[i] = k;
2355       }
2356     }
2357   } while (sctx.newshift);
2358 
2359   PetscCall(PetscFree3(rtmp, il, jl));
2360   PetscCall(ISRestoreIndices(ip, &rip));
2361   PetscCall(ISRestoreIndices(iip, &riip));
2362 
2363   PetscCall(ISIdentity(ip, &perm_identity));
2364   if (perm_identity) {
2365     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2366     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2367     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2368     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2369   } else {
2370     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2371     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2372     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2373     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2374   }
2375 
2376   C->assembled    = PETSC_TRUE;
2377   C->preallocated = PETSC_TRUE;
2378 
2379   PetscCall(PetscLogFlops(C->rmap->n));
2380   if (sctx.nshift) {
2381     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2382       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2383     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2384       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2385     }
2386   }
2387   PetscFunctionReturn(PETSC_SUCCESS);
2388 }
2389 
2390 /*
2391    icc() under revised new data structure.
2392    Factored arrays bj and ba are stored as
2393      U(0,:),...,U(i,:),U(n-1,:)
2394 
2395    ui=fact->i is an array of size n+1, in which
2396    ui+
2397      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2398      ui[n]:  points to U(n-1,n-1)+1
2399 
2400   udiag=fact->diag is an array of size n,in which
2401      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2402 
2403    U(i,:) contains udiag[i] as its last entry, i.e.,
2404     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2405 */
2406 
2407 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2408 {
2409   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2410   Mat_SeqSBAIJ      *b;
2411   PetscBool          perm_identity, missing;
2412   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2413   const PetscInt    *rip, *riip;
2414   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2415   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2416   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2417   PetscReal          fill = info->fill, levels = info->levels;
2418   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2419   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2420   PetscBT            lnkbt;
2421   IS                 iperm;
2422 
2423   PetscFunctionBegin;
2424   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2425   PetscCall(MatMissingDiagonal(A, &missing, &d));
2426   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2427   PetscCall(ISIdentity(perm, &perm_identity));
2428   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2429 
2430   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2431   PetscCall(PetscMalloc1(am + 1, &udiag));
2432   ui[0] = 0;
2433 
2434   /* ICC(0) without matrix ordering: simply rearrange column indices */
2435   if (!levels && perm_identity) {
2436     for (i = 0; i < am; i++) {
2437       ncols     = ai[i + 1] - a->diag[i];
2438       ui[i + 1] = ui[i] + ncols;
2439       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2440     }
2441     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2442     cols = uj;
2443     for (i = 0; i < am; i++) {
2444       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2445       ncols = ai[i + 1] - a->diag[i] - 1;
2446       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2447       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2448     }
2449   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2450     PetscCall(ISGetIndices(iperm, &riip));
2451     PetscCall(ISGetIndices(perm, &rip));
2452 
2453     /* initialization */
2454     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2455 
2456     /* jl: linked list for storing indices of the pivot rows
2457        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2458     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2459     for (i = 0; i < am; i++) {
2460       jl[i] = am;
2461       il[i] = 0;
2462     }
2463 
2464     /* create and initialize a linked list for storing column indices of the active row k */
2465     nlnk = am + 1;
2466     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2467 
2468     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2469     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2470     current_space = free_space;
2471     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2472     current_space_lvl = free_space_lvl;
2473 
2474     for (k = 0; k < am; k++) { /* for each active row k */
2475       /* initialize lnk by the column indices of row rip[k] of A */
2476       nzk   = 0;
2477       ncols = ai[rip[k] + 1] - ai[rip[k]];
2478       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2479       ncols_upper = 0;
2480       for (j = 0; j < ncols; j++) {
2481         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2482         if (riip[i] >= k) {         /* only take upper triangular entry */
2483           ajtmp[ncols_upper] = i;
2484           ncols_upper++;
2485         }
2486       }
2487       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2488       nzk += nlnk;
2489 
2490       /* update lnk by computing fill-in for each pivot row to be merged in */
2491       prow = jl[k]; /* 1st pivot row */
2492 
2493       while (prow < k) {
2494         nextprow = jl[prow];
2495 
2496         /* merge prow into k-th row */
2497         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2498         jmax  = ui[prow + 1];
2499         ncols = jmax - jmin;
2500         i     = jmin - ui[prow];
2501         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2502         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2503         j     = *(uj - 1);
2504         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2505         nzk += nlnk;
2506 
2507         /* update il and jl for prow */
2508         if (jmin < jmax) {
2509           il[prow] = jmin;
2510           j        = *cols;
2511           jl[prow] = jl[j];
2512           jl[j]    = prow;
2513         }
2514         prow = nextprow;
2515       }
2516 
2517       /* if free space is not available, make more free space */
2518       if (current_space->local_remaining < nzk) {
2519         i = am - k + 1;                                    /* num of unfactored rows */
2520         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2521         PetscCall(PetscFreeSpaceGet(i, &current_space));
2522         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2523         reallocs++;
2524       }
2525 
2526       /* copy data into free_space and free_space_lvl, then initialize lnk */
2527       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2528       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2529 
2530       /* add the k-th row into il and jl */
2531       if (nzk > 1) {
2532         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2533         jl[k] = jl[i];
2534         jl[i] = k;
2535         il[k] = ui[k] + 1;
2536       }
2537       uj_ptr[k]     = current_space->array;
2538       uj_lvl_ptr[k] = current_space_lvl->array;
2539 
2540       current_space->array += nzk;
2541       current_space->local_used += nzk;
2542       current_space->local_remaining -= nzk;
2543 
2544       current_space_lvl->array += nzk;
2545       current_space_lvl->local_used += nzk;
2546       current_space_lvl->local_remaining -= nzk;
2547 
2548       ui[k + 1] = ui[k] + nzk;
2549     }
2550 
2551     PetscCall(ISRestoreIndices(perm, &rip));
2552     PetscCall(ISRestoreIndices(iperm, &riip));
2553     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2554     PetscCall(PetscFree(ajtmp));
2555 
2556     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2557     PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2558     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
2559     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2560     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2561 
2562   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2563 
2564   /* put together the new matrix in MATSEQSBAIJ format */
2565   b          = (Mat_SeqSBAIJ *)(fact)->data;
2566   b->free_ij = PETSC_TRUE;
2567   PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscScalar), (void **)&b->a));
2568   b->free_a    = PETSC_TRUE;
2569   b->j         = uj;
2570   b->i         = ui;
2571   b->diag      = udiag;
2572   b->free_diag = PETSC_TRUE;
2573   b->ilen      = NULL;
2574   b->imax      = NULL;
2575   b->row       = perm;
2576   b->col       = perm;
2577   PetscCall(PetscObjectReference((PetscObject)perm));
2578   PetscCall(PetscObjectReference((PetscObject)perm));
2579   b->icol          = iperm;
2580   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2581 
2582   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2583 
2584   b->maxnz = b->nz = ui[am];
2585 
2586   fact->info.factor_mallocs   = reallocs;
2587   fact->info.fill_ratio_given = fill;
2588   if (ai[am] != 0) {
2589     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2590     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2591   } else {
2592     fact->info.fill_ratio_needed = 0.0;
2593   }
2594 #if defined(PETSC_USE_INFO)
2595   if (ai[am] != 0) {
2596     PetscReal af = fact->info.fill_ratio_needed;
2597     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2598     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2599     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2600   } else {
2601     PetscCall(PetscInfo(A, "Empty matrix\n"));
2602   }
2603 #endif
2604   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2605   PetscFunctionReturn(PETSC_SUCCESS);
2606 }
2607 
2608 #if 0
2609 // unused
2610 static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2611 {
2612   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2613   Mat_SeqSBAIJ      *b;
2614   PetscBool          perm_identity, missing;
2615   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2616   const PetscInt    *rip, *riip;
2617   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2618   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2619   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2620   PetscReal          fill = info->fill, levels = info->levels;
2621   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2622   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2623   PetscBT            lnkbt;
2624   IS                 iperm;
2625 
2626   PetscFunctionBegin;
2627   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2628   PetscCall(MatMissingDiagonal(A, &missing, &d));
2629   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2630   PetscCall(ISIdentity(perm, &perm_identity));
2631   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2632 
2633   PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui));
2634   PetscCall(PetscMalloc1(am + 1, &udiag));
2635   ui[0] = 0;
2636 
2637   /* ICC(0) without matrix ordering: simply copies fill pattern */
2638   if (!levels && perm_identity) {
2639     for (i = 0; i < am; i++) {
2640       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2641       udiag[i]  = ui[i];
2642     }
2643     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2644     cols = uj;
2645     for (i = 0; i < am; i++) {
2646       aj    = a->j + a->diag[i];
2647       ncols = ui[i + 1] - ui[i];
2648       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2649     }
2650   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2651     PetscCall(ISGetIndices(iperm, &riip));
2652     PetscCall(ISGetIndices(perm, &rip));
2653 
2654     /* initialization */
2655     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2656 
2657     /* jl: linked list for storing indices of the pivot rows
2658        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2659     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2660     for (i = 0; i < am; i++) {
2661       jl[i] = am;
2662       il[i] = 0;
2663     }
2664 
2665     /* create and initialize a linked list for storing column indices of the active row k */
2666     nlnk = am + 1;
2667     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2668 
2669     /* initial FreeSpace size is fill*(ai[am]+1) */
2670     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2671     current_space = free_space;
2672     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2673     current_space_lvl = free_space_lvl;
2674 
2675     for (k = 0; k < am; k++) { /* for each active row k */
2676       /* initialize lnk by the column indices of row rip[k] of A */
2677       nzk   = 0;
2678       ncols = ai[rip[k] + 1] - ai[rip[k]];
2679       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2680       ncols_upper = 0;
2681       for (j = 0; j < ncols; j++) {
2682         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2683         if (riip[i] >= k) {         /* only take upper triangular entry */
2684           ajtmp[ncols_upper] = i;
2685           ncols_upper++;
2686         }
2687       }
2688       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2689       nzk += nlnk;
2690 
2691       /* update lnk by computing fill-in for each pivot row to be merged in */
2692       prow = jl[k]; /* 1st pivot row */
2693 
2694       while (prow < k) {
2695         nextprow = jl[prow];
2696 
2697         /* merge prow into k-th row */
2698         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2699         jmax  = ui[prow + 1];
2700         ncols = jmax - jmin;
2701         i     = jmin - ui[prow];
2702         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2703         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2704         j     = *(uj - 1);
2705         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2706         nzk += nlnk;
2707 
2708         /* update il and jl for prow */
2709         if (jmin < jmax) {
2710           il[prow] = jmin;
2711           j        = *cols;
2712           jl[prow] = jl[j];
2713           jl[j]    = prow;
2714         }
2715         prow = nextprow;
2716       }
2717 
2718       /* if free space is not available, make more free space */
2719       if (current_space->local_remaining < nzk) {
2720         i = am - k + 1;                                    /* num of unfactored rows */
2721         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2722         PetscCall(PetscFreeSpaceGet(i, &current_space));
2723         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2724         reallocs++;
2725       }
2726 
2727       /* copy data into free_space and free_space_lvl, then initialize lnk */
2728       PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2729       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2730 
2731       /* add the k-th row into il and jl */
2732       if (nzk > 1) {
2733         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2734         jl[k] = jl[i];
2735         jl[i] = k;
2736         il[k] = ui[k] + 1;
2737       }
2738       uj_ptr[k]     = current_space->array;
2739       uj_lvl_ptr[k] = current_space_lvl->array;
2740 
2741       current_space->array += nzk;
2742       current_space->local_used += nzk;
2743       current_space->local_remaining -= nzk;
2744 
2745       current_space_lvl->array += nzk;
2746       current_space_lvl->local_used += nzk;
2747       current_space_lvl->local_remaining -= nzk;
2748 
2749       ui[k + 1] = ui[k] + nzk;
2750     }
2751 
2752   #if defined(PETSC_USE_INFO)
2753     if (ai[am] != 0) {
2754       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2755       PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2756       PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2757       PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2758     } else {
2759       PetscCall(PetscInfo(A, "Empty matrix\n"));
2760     }
2761   #endif
2762 
2763     PetscCall(ISRestoreIndices(perm, &rip));
2764     PetscCall(ISRestoreIndices(iperm, &riip));
2765     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2766     PetscCall(PetscFree(ajtmp));
2767 
2768     /* destroy list of free space and other temporary array(s) */
2769     PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj));
2770     PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2771     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2772     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2773 
2774   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2775 
2776   /* put together the new matrix in MATSEQSBAIJ format */
2777 
2778   b               = (Mat_SeqSBAIJ *)fact->data;
2779   b->free_ij       = PETSC_TRUE;
2780   PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a));
2781   b->free_a        = PETSC_TRUE;
2782   b->j         = uj;
2783   b->i         = ui;
2784   b->diag      = udiag;
2785   b->free_diag = PETSC_TRUE;
2786   b->ilen      = NULL;
2787   b->imax      = NULL;
2788   b->row       = perm;
2789   b->col       = perm;
2790 
2791   PetscCall(PetscObjectReference((PetscObject)perm));
2792   PetscCall(PetscObjectReference((PetscObject)perm));
2793 
2794   b->icol          = iperm;
2795   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2796   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2797   b->maxnz = b->nz = ui[am];
2798 
2799   fact->info.factor_mallocs   = reallocs;
2800   fact->info.fill_ratio_given = fill;
2801   if (ai[am] != 0) {
2802     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2803   } else {
2804     fact->info.fill_ratio_needed = 0.0;
2805   }
2806   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2807   PetscFunctionReturn(PETSC_SUCCESS);
2808 }
2809 #endif
2810 
2811 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2812 {
2813   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2814   Mat_SeqSBAIJ      *b;
2815   PetscBool          perm_identity, missing;
2816   PetscReal          fill = info->fill;
2817   const PetscInt    *rip, *riip;
2818   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2819   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2820   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2821   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2822   PetscBT            lnkbt;
2823   IS                 iperm;
2824 
2825   PetscFunctionBegin;
2826   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2827   PetscCall(MatMissingDiagonal(A, &missing, &i));
2828   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2829 
2830   /* check whether perm is the identity mapping */
2831   PetscCall(ISIdentity(perm, &perm_identity));
2832   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2833   PetscCall(ISGetIndices(iperm, &riip));
2834   PetscCall(ISGetIndices(perm, &rip));
2835 
2836   /* initialization */
2837   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2838   PetscCall(PetscMalloc1(am + 1, &udiag));
2839   ui[0] = 0;
2840 
2841   /* jl: linked list for storing indices of the pivot rows
2842      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2843   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2844   for (i = 0; i < am; i++) {
2845     jl[i] = am;
2846     il[i] = 0;
2847   }
2848 
2849   /* create and initialize a linked list for storing column indices of the active row k */
2850   nlnk = am + 1;
2851   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2852 
2853   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2854   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2855   current_space = free_space;
2856 
2857   for (k = 0; k < am; k++) { /* for each active row k */
2858     /* initialize lnk by the column indices of row rip[k] of A */
2859     nzk   = 0;
2860     ncols = ai[rip[k] + 1] - ai[rip[k]];
2861     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2862     ncols_upper = 0;
2863     for (j = 0; j < ncols; j++) {
2864       i = riip[*(aj + ai[rip[k]] + j)];
2865       if (i >= k) { /* only take upper triangular entry */
2866         cols[ncols_upper] = i;
2867         ncols_upper++;
2868       }
2869     }
2870     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2871     nzk += nlnk;
2872 
2873     /* update lnk by computing fill-in for each pivot row to be merged in */
2874     prow = jl[k]; /* 1st pivot row */
2875 
2876     while (prow < k) {
2877       nextprow = jl[prow];
2878       /* merge prow into k-th row */
2879       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2880       jmax   = ui[prow + 1];
2881       ncols  = jmax - jmin;
2882       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2883       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2884       nzk += nlnk;
2885 
2886       /* update il and jl for prow */
2887       if (jmin < jmax) {
2888         il[prow] = jmin;
2889         j        = *uj_ptr;
2890         jl[prow] = jl[j];
2891         jl[j]    = prow;
2892       }
2893       prow = nextprow;
2894     }
2895 
2896     /* if free space is not available, make more free space */
2897     if (current_space->local_remaining < nzk) {
2898       i = am - k + 1;                                    /* num of unfactored rows */
2899       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2900       PetscCall(PetscFreeSpaceGet(i, &current_space));
2901       reallocs++;
2902     }
2903 
2904     /* copy data into free space, then initialize lnk */
2905     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2906 
2907     /* add the k-th row into il and jl */
2908     if (nzk > 1) {
2909       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2910       jl[k] = jl[i];
2911       jl[i] = k;
2912       il[k] = ui[k] + 1;
2913     }
2914     ui_ptr[k] = current_space->array;
2915 
2916     current_space->array += nzk;
2917     current_space->local_used += nzk;
2918     current_space->local_remaining -= nzk;
2919 
2920     ui[k + 1] = ui[k] + nzk;
2921   }
2922 
2923   PetscCall(ISRestoreIndices(perm, &rip));
2924   PetscCall(ISRestoreIndices(iperm, &riip));
2925   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2926 
2927   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2928   PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2929   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2930   PetscCall(PetscLLDestroy(lnk, lnkbt));
2931 
2932   /* put together the new matrix in MATSEQSBAIJ format */
2933   b          = (Mat_SeqSBAIJ *)fact->data;
2934   b->free_ij = PETSC_TRUE;
2935   PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscScalar), (void **)&b->a));
2936   b->free_a    = PETSC_TRUE;
2937   b->j         = uj;
2938   b->i         = ui;
2939   b->diag      = udiag;
2940   b->free_diag = PETSC_TRUE;
2941   b->ilen      = NULL;
2942   b->imax      = NULL;
2943   b->row       = perm;
2944   b->col       = perm;
2945 
2946   PetscCall(PetscObjectReference((PetscObject)perm));
2947   PetscCall(PetscObjectReference((PetscObject)perm));
2948 
2949   b->icol          = iperm;
2950   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2951 
2952   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2953 
2954   b->maxnz = b->nz = ui[am];
2955 
2956   fact->info.factor_mallocs   = reallocs;
2957   fact->info.fill_ratio_given = fill;
2958   if (ai[am] != 0) {
2959     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2960     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2961   } else {
2962     fact->info.fill_ratio_needed = 0.0;
2963   }
2964 #if defined(PETSC_USE_INFO)
2965   if (ai[am] != 0) {
2966     PetscReal af = fact->info.fill_ratio_needed;
2967     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2968     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2969     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2970   } else {
2971     PetscCall(PetscInfo(A, "Empty matrix\n"));
2972   }
2973 #endif
2974   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2975   PetscFunctionReturn(PETSC_SUCCESS);
2976 }
2977 
2978 #if 0
2979 // unused
2980 static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2981 {
2982   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2983   Mat_SeqSBAIJ      *b;
2984   PetscBool          perm_identity, missing;
2985   PetscReal          fill = info->fill;
2986   const PetscInt    *rip, *riip;
2987   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2988   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2989   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
2990   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2991   PetscBT            lnkbt;
2992   IS                 iperm;
2993 
2994   PetscFunctionBegin;
2995   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2996   PetscCall(MatMissingDiagonal(A, &missing, &i));
2997   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2998 
2999   /* check whether perm is the identity mapping */
3000   PetscCall(ISIdentity(perm, &perm_identity));
3001   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
3002   PetscCall(ISGetIndices(iperm, &riip));
3003   PetscCall(ISGetIndices(perm, &rip));
3004 
3005   /* initialization */
3006   PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui));
3007   ui[0] = 0;
3008 
3009   /* jl: linked list for storing indices of the pivot rows
3010      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3011   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
3012   for (i = 0; i < am; i++) {
3013     jl[i] = am;
3014     il[i] = 0;
3015   }
3016 
3017   /* create and initialize a linked list for storing column indices of the active row k */
3018   nlnk = am + 1;
3019   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
3020 
3021   /* initial FreeSpace size is fill*(ai[am]+1) */
3022   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
3023   current_space = free_space;
3024 
3025   for (k = 0; k < am; k++) { /* for each active row k */
3026     /* initialize lnk by the column indices of row rip[k] of A */
3027     nzk   = 0;
3028     ncols = ai[rip[k] + 1] - ai[rip[k]];
3029     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
3030     ncols_upper = 0;
3031     for (j = 0; j < ncols; j++) {
3032       i = riip[*(aj + ai[rip[k]] + j)];
3033       if (i >= k) { /* only take upper triangular entry */
3034         cols[ncols_upper] = i;
3035         ncols_upper++;
3036       }
3037     }
3038     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
3039     nzk += nlnk;
3040 
3041     /* update lnk by computing fill-in for each pivot row to be merged in */
3042     prow = jl[k]; /* 1st pivot row */
3043 
3044     while (prow < k) {
3045       nextprow = jl[prow];
3046       /* merge prow into k-th row */
3047       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3048       jmax   = ui[prow + 1];
3049       ncols  = jmax - jmin;
3050       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3051       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
3052       nzk += nlnk;
3053 
3054       /* update il and jl for prow */
3055       if (jmin < jmax) {
3056         il[prow] = jmin;
3057         j        = *uj_ptr;
3058         jl[prow] = jl[j];
3059         jl[j]    = prow;
3060       }
3061       prow = nextprow;
3062     }
3063 
3064     /* if free space is not available, make more free space */
3065     if (current_space->local_remaining < nzk) {
3066       i = am - k + 1;                     /* num of unfactored rows */
3067       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3068       PetscCall(PetscFreeSpaceGet(i, &current_space));
3069       reallocs++;
3070     }
3071 
3072     /* copy data into free space, then initialize lnk */
3073     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
3074 
3075     /* add the k-th row into il and jl */
3076     if (nzk - 1 > 0) {
3077       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3078       jl[k] = jl[i];
3079       jl[i] = k;
3080       il[k] = ui[k] + 1;
3081     }
3082     ui_ptr[k] = current_space->array;
3083 
3084     current_space->array += nzk;
3085     current_space->local_used += nzk;
3086     current_space->local_remaining -= nzk;
3087 
3088     ui[k + 1] = ui[k] + nzk;
3089   }
3090 
3091   #if defined(PETSC_USE_INFO)
3092   if (ai[am] != 0) {
3093     PetscReal af = (PetscReal)ui[am] / (PetscReal)ai[am];
3094     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3095     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3096     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3097   } else {
3098     PetscCall(PetscInfo(A, "Empty matrix\n"));
3099   }
3100   #endif
3101 
3102   PetscCall(ISRestoreIndices(perm, &rip));
3103   PetscCall(ISRestoreIndices(iperm, &riip));
3104   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
3105 
3106   /* destroy list of free space and other temporary array(s) */
3107   PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj));
3108   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3109   PetscCall(PetscLLDestroy(lnk, lnkbt));
3110 
3111   /* put together the new matrix in MATSEQSBAIJ format */
3112   b               = (Mat_SeqSBAIJ *)fact->data;
3113   b->free_ij      = PETSC_TRUE;
3114   PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a));
3115   b->free_a       = PETSC_TRUE;
3116   b->j    = uj;
3117   b->i    = ui;
3118   b->diag = NULL;
3119   b->ilen = NULL;
3120   b->imax = NULL;
3121   b->row  = perm;
3122   b->col  = perm;
3123 
3124   PetscCall(PetscObjectReference((PetscObject)perm));
3125   PetscCall(PetscObjectReference((PetscObject)perm));
3126 
3127   b->icol          = iperm;
3128   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3129 
3130   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3131   b->maxnz = b->nz = ui[am];
3132 
3133   fact->info.factor_mallocs   = reallocs;
3134   fact->info.fill_ratio_given = fill;
3135   if (ai[am] != 0) {
3136     fact->info.fill_ratio_needed = (PetscReal)ui[am] / (PetscReal)ai[am];
3137   } else {
3138     fact->info.fill_ratio_needed = 0.0;
3139   }
3140   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3141   PetscFunctionReturn(PETSC_SUCCESS);
3142 }
3143 #endif
3144 
3145 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3146 {
3147   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3148   PetscInt           n  = A->rmap->n;
3149   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3150   PetscScalar       *x, sum;
3151   const PetscScalar *b;
3152   const MatScalar   *aa = a->a, *v;
3153   PetscInt           i, nz;
3154 
3155   PetscFunctionBegin;
3156   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3157 
3158   PetscCall(VecGetArrayRead(bb, &b));
3159   PetscCall(VecGetArrayWrite(xx, &x));
3160 
3161   /* forward solve the lower triangular */
3162   x[0] = b[0];
3163   v    = aa;
3164   vi   = aj;
3165   for (i = 1; i < n; i++) {
3166     nz  = ai[i + 1] - ai[i];
3167     sum = b[i];
3168     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3169     v += nz;
3170     vi += nz;
3171     x[i] = sum;
3172   }
3173 
3174   /* backward solve the upper triangular */
3175   for (i = n - 1; i >= 0; i--) {
3176     v   = aa + adiag[i + 1] + 1;
3177     vi  = aj + adiag[i + 1] + 1;
3178     nz  = adiag[i] - adiag[i + 1] - 1;
3179     sum = x[i];
3180     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3181     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3182   }
3183 
3184   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3185   PetscCall(VecRestoreArrayRead(bb, &b));
3186   PetscCall(VecRestoreArrayWrite(xx, &x));
3187   PetscFunctionReturn(PETSC_SUCCESS);
3188 }
3189 
3190 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3191 {
3192   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3193   IS                 iscol = a->col, isrow = a->row;
3194   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3195   const PetscInt    *rout, *cout, *r, *c;
3196   PetscScalar       *x, *tmp, sum;
3197   const PetscScalar *b;
3198   const MatScalar   *aa = a->a, *v;
3199 
3200   PetscFunctionBegin;
3201   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3202 
3203   PetscCall(VecGetArrayRead(bb, &b));
3204   PetscCall(VecGetArrayWrite(xx, &x));
3205   tmp = a->solve_work;
3206 
3207   PetscCall(ISGetIndices(isrow, &rout));
3208   r = rout;
3209   PetscCall(ISGetIndices(iscol, &cout));
3210   c = cout;
3211 
3212   /* forward solve the lower triangular */
3213   tmp[0] = b[r[0]];
3214   v      = aa;
3215   vi     = aj;
3216   for (i = 1; i < n; i++) {
3217     nz  = ai[i + 1] - ai[i];
3218     sum = b[r[i]];
3219     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3220     tmp[i] = sum;
3221     v += nz;
3222     vi += nz;
3223   }
3224 
3225   /* backward solve the upper triangular */
3226   for (i = n - 1; i >= 0; i--) {
3227     v   = aa + adiag[i + 1] + 1;
3228     vi  = aj + adiag[i + 1] + 1;
3229     nz  = adiag[i] - adiag[i + 1] - 1;
3230     sum = tmp[i];
3231     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3232     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3233   }
3234 
3235   PetscCall(ISRestoreIndices(isrow, &rout));
3236   PetscCall(ISRestoreIndices(iscol, &cout));
3237   PetscCall(VecRestoreArrayRead(bb, &b));
3238   PetscCall(VecRestoreArrayWrite(xx, &x));
3239   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3240   PetscFunctionReturn(PETSC_SUCCESS);
3241 }
3242 
3243 #if 0
3244 // unused
3245 /*
3246     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3247 */
3248 static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3249 {
3250   Mat             B = *fact;
3251   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3252   IS              isicol;
3253   const PetscInt *r, *ic;
3254   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3255   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3256   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3257   PetscInt        nlnk, *lnk;
3258   PetscBT         lnkbt;
3259   PetscBool       row_identity, icol_identity;
3260   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3261   const PetscInt *ics;
3262   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3263   PetscReal       dt = info->dt, shift = info->shiftamount;
3264   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3265   PetscBool       missing;
3266 
3267   PetscFunctionBegin;
3268   if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005;
3269   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3270 
3271   /* symbolic factorization, can be reused */
3272   PetscCall(MatMissingDiagonal(A, &missing, &i));
3273   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3274   adiag = a->diag;
3275 
3276   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3277 
3278   /* bdiag is location of diagonal in factor */
3279   PetscCall(PetscMalloc1(n + 1, &bdiag));     /* becomes b->diag */
3280   PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */
3281 
3282   /* allocate row pointers bi */
3283   PetscCall(PetscShmgetAllocateArray(2 * n + 2,sizeof(PetscInt),(void **)&bi));
3284 
3285   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3286   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3287   nnz_max = ai[n] + 2 * n * dtcount + 2;
3288 
3289   PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscInt),(void **)&bj));
3290   PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscScalar),(void **)&ba));
3291 
3292   /* put together the new matrix */
3293   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3294   b = (Mat_SeqAIJ *)B->data;
3295   b->free_a       = PETSC_TRUE;
3296   b->free_ij      = PETSC_TRUE;
3297   b->a    = ba;
3298   b->j    = bj;
3299   b->i    = bi;
3300   b->diag = bdiag;
3301   b->ilen = NULL;
3302   b->imax = NULL;
3303   b->row  = isrow;
3304   b->col  = iscol;
3305   PetscCall(PetscObjectReference((PetscObject)isrow));
3306   PetscCall(PetscObjectReference((PetscObject)iscol));
3307   b->icol = isicol;
3308 
3309   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3310   b->maxnz = nnz_max;
3311 
3312   B->factortype            = MAT_FACTOR_ILUDT;
3313   B->info.factor_mallocs   = 0;
3314   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3315   /*  end of symbolic factorization */
3316 
3317   PetscCall(ISGetIndices(isrow, &r));
3318   PetscCall(ISGetIndices(isicol, &ic));
3319   ics = ic;
3320 
3321   /* linked list for storing column indices of the active row */
3322   nlnk = n + 1;
3323   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
3324 
3325   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3326   PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3327   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3328   PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3329   PetscCall(PetscArrayzero(rtmp, n));
3330 
3331   bi[0]         = 0;
3332   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3333   bdiag_rev[n]  = bdiag[0];
3334   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3335   for (i = 0; i < n; i++) {
3336     /* copy initial fill into linked list */
3337     nzi = ai[r[i] + 1] - ai[r[i]];
3338     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3339     nzi_al = adiag[r[i]] - ai[r[i]];
3340     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3341     ajtmp  = aj + ai[r[i]];
3342     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3343 
3344     /* load in initial (unfactored row) */
3345     aatmp = a->a + ai[r[i]];
3346     for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;
3347 
3348     /* add pivot rows into linked list */
3349     row = lnk[n];
3350     while (row < i) {
3351       nzi_bl = bi[row + 1] - bi[row] + 1;
3352       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3353       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3354       nzi += nlnk;
3355       row = lnk[row];
3356     }
3357 
3358     /* copy data from lnk into jtmp, then initialize lnk */
3359     PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));
3360 
3361     /* numerical factorization */
3362     bjtmp = jtmp;
3363     row   = *bjtmp++; /* 1st pivot row */
3364     while (row < i) {
3365       pc         = rtmp + row;
3366       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3367       multiplier = (*pc) * (*pv);
3368       *pc        = multiplier;
3369       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3370         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3371         pv = ba + bdiag[row + 1] + 1;
3372         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3373         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3374         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3375       }
3376       row = *bjtmp++;
3377     }
3378 
3379     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3380     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3381     nzi_bl   = 0;
3382     j        = 0;
3383     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3384       vtmp[j]       = rtmp[jtmp[j]];
3385       rtmp[jtmp[j]] = 0.0;
3386       nzi_bl++;
3387       j++;
3388     }
3389     nzi_bu = nzi - nzi_bl - 1;
3390     while (j < nzi) {
3391       vtmp[j]       = rtmp[jtmp[j]];
3392       rtmp[jtmp[j]] = 0.0;
3393       j++;
3394     }
3395 
3396     bjtmp = bj + bi[i];
3397     batmp = ba + bi[i];
3398     /* apply level dropping rule to L part */
3399     ncut = nzi_al + dtcount;
3400     if (ncut < nzi_bl) {
3401       PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3402       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3403     } else {
3404       ncut = nzi_bl;
3405     }
3406     for (j = 0; j < ncut; j++) {
3407       bjtmp[j] = jtmp[j];
3408       batmp[j] = vtmp[j];
3409     }
3410     bi[i + 1] = bi[i] + ncut;
3411     nzi       = ncut + 1;
3412 
3413     /* apply level dropping rule to U part */
3414     ncut = nzi_au + dtcount;
3415     if (ncut < nzi_bu) {
3416       PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3417       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3418     } else {
3419       ncut = nzi_bu;
3420     }
3421     nzi += ncut;
3422 
3423     /* mark bdiagonal */
3424     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3425     bdiag_rev[n - i - 1] = bdiag[i + 1];
3426     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
3427     bjtmp                = bj + bdiag[i];
3428     batmp                = ba + bdiag[i];
3429     *bjtmp               = i;
3430     *batmp               = diag_tmp; /* rtmp[i]; */
3431     if (*batmp == 0.0) *batmp = dt + shift;
3432     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
3433 
3434     bjtmp = bj + bdiag[i + 1] + 1;
3435     batmp = ba + bdiag[i + 1] + 1;
3436     for (k = 0; k < ncut; k++) {
3437       bjtmp[k] = jtmp[nzi_bl + 1 + k];
3438       batmp[k] = vtmp[nzi_bl + 1 + k];
3439     }
3440 
3441     im[i] = nzi; /* used by PetscLLAddSortedLU() */
3442   }              /* for (i=0; i<n; i++) */
3443   PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);
3444 
3445   PetscCall(ISRestoreIndices(isrow, &r));
3446   PetscCall(ISRestoreIndices(isicol, &ic));
3447 
3448   PetscCall(PetscLLDestroy(lnk, lnkbt));
3449   PetscCall(PetscFree2(im, jtmp));
3450   PetscCall(PetscFree2(rtmp, vtmp));
3451   PetscCall(PetscFree(bdiag_rev));
3452 
3453   PetscCall(PetscLogFlops(B->cmap->n));
3454   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3455 
3456   PetscCall(ISIdentity(isrow, &row_identity));
3457   PetscCall(ISIdentity(isicol, &icol_identity));
3458   if (row_identity && icol_identity) {
3459     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3460   } else {
3461     B->ops->solve = MatSolve_SeqAIJ;
3462   }
3463 
3464   B->ops->solveadd          = NULL;
3465   B->ops->solvetranspose    = NULL;
3466   B->ops->solvetransposeadd = NULL;
3467   B->ops->matsolve          = NULL;
3468   B->assembled              = PETSC_TRUE;
3469   B->preallocated           = PETSC_TRUE;
3470   PetscFunctionReturn(PETSC_SUCCESS);
3471 }
3472 
3473 /* a wrapper of MatILUDTFactor_SeqAIJ() */
3474 /*
3475     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3476 */
3477 
3478 static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3479 {
3480   PetscFunctionBegin;
3481   PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3482   PetscFunctionReturn(PETSC_SUCCESS);
3483 }
3484 
3485 /*
3486    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3487    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3488 */
3489 /*
3490     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3491 */
3492 
3493 static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3494 {
3495   Mat             C = fact;
3496   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3497   IS              isrow = b->row, isicol = b->icol;
3498   const PetscInt *r, *ic, *ics;
3499   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3500   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3501   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3502   PetscReal       dt = info->dt, shift = info->shiftamount;
3503   PetscBool       row_identity, col_identity;
3504 
3505   PetscFunctionBegin;
3506   PetscCall(ISGetIndices(isrow, &r));
3507   PetscCall(ISGetIndices(isicol, &ic));
3508   PetscCall(PetscMalloc1(n + 1, &rtmp));
3509   ics = ic;
3510 
3511   for (i = 0; i < n; i++) {
3512     /* initialize rtmp array */
3513     nzl   = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */
3514     bjtmp = bj + bi[i];
3515     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3516     rtmp[i] = 0.0;
3517     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */
3518     bjtmp   = bj + bdiag[i + 1] + 1;
3519     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
3520 
3521     /* load in initial unfactored row of A */
3522     nz    = ai[r[i] + 1] - ai[r[i]];
3523     ajtmp = aj + ai[r[i]];
3524     v     = aa + ai[r[i]];
3525     for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];
3526 
3527     /* numerical factorization */
3528     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3529     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3530     k     = 0;
3531     while (k < nzl) {
3532       row        = *bjtmp++;
3533       pc         = rtmp + row;
3534       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3535       multiplier = (*pc) * (*pv);
3536       *pc        = multiplier;
3537       if (PetscAbsScalar(multiplier) > dt) {
3538         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3539         pv = b->a + bdiag[row + 1] + 1;
3540         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3541         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3542         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3543       }
3544       k++;
3545     }
3546 
3547     /* finished row so stick it into b->a */
3548     /* L-part */
3549     pv  = b->a + bi[i];
3550     pj  = bj + bi[i];
3551     nzl = bi[i + 1] - bi[i];
3552     for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];
3553 
3554     /* diagonal: invert diagonal entries for simpler triangular solves */
3555     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3556     b->a[bdiag[i]] = 1.0 / rtmp[i];
3557 
3558     /* U-part */
3559     pv  = b->a + bdiag[i + 1] + 1;
3560     pj  = bj + bdiag[i + 1] + 1;
3561     nzu = bdiag[i] - bdiag[i + 1] - 1;
3562     for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3563   }
3564 
3565   PetscCall(PetscFree(rtmp));
3566   PetscCall(ISRestoreIndices(isicol, &ic));
3567   PetscCall(ISRestoreIndices(isrow, &r));
3568 
3569   PetscCall(ISIdentity(isrow, &row_identity));
3570   PetscCall(ISIdentity(isicol, &col_identity));
3571   if (row_identity && col_identity) {
3572     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3573   } else {
3574     C->ops->solve = MatSolve_SeqAIJ;
3575   }
3576   C->ops->solveadd          = NULL;
3577   C->ops->solvetranspose    = NULL;
3578   C->ops->solvetransposeadd = NULL;
3579   C->ops->matsolve          = NULL;
3580   C->assembled              = PETSC_TRUE;
3581   C->preallocated           = PETSC_TRUE;
3582 
3583   PetscCall(PetscLogFlops(C->cmap->n));
3584   PetscFunctionReturn(PETSC_SUCCESS);
3585 }
3586 #endif
3587