xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision b6e6beb40ac0a73ccc19b70153df96d5058dce43)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 /*
8       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
9 
10       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
11 */
12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
13 {
14   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
15   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
16   const PetscInt    *ai = a->i, *aj = a->j;
17   const PetscScalar *aa = a->a;
18   PetscBool         *done;
19   PetscReal          best, past = 0, future;
20 
21   PetscFunctionBegin;
22   /* pick initial row */
23   best = -1;
24   for (i = 0; i < n; i++) {
25     future = 0.0;
26     for (j = ai[i]; j < ai[i + 1]; j++) {
27       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
28       else past = PetscAbsScalar(aa[j]);
29     }
30     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
31     if (past / future > best) {
32       best    = past / future;
33       current = i;
34     }
35   }
36 
37   PetscCall(PetscMalloc1(n, &done));
38   PetscCall(PetscArrayzero(done, n));
39   PetscCall(PetscMalloc1(n, &order));
40   order[0] = current;
41   for (i = 0; i < n - 1; i++) {
42     done[current] = PETSC_TRUE;
43     best          = -1;
44     /* loop over all neighbors of current pivot */
45     for (j = ai[current]; j < ai[current + 1]; j++) {
46       jj = aj[j];
47       if (done[jj]) continue;
48       /* loop over columns of potential next row computing weights for below and above diagonal */
49       past = future = 0.0;
50       for (k = ai[jj]; k < ai[jj + 1]; k++) {
51         kk = aj[k];
52         if (done[kk]) past += PetscAbsScalar(aa[k]);
53         else if (kk != jj) future += PetscAbsScalar(aa[k]);
54       }
55       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
56       if (past / future > best) {
57         best       = past / future;
58         newcurrent = jj;
59       }
60     }
61     if (best == -1) { /* no neighbors to select from so select best of all that remain */
62       best = -1;
63       for (k = 0; k < n; k++) {
64         if (done[k]) continue;
65         future = 0.0;
66         past   = 0.0;
67         for (j = ai[k]; j < ai[k + 1]; j++) {
68           kk = aj[j];
69           if (done[kk]) past += PetscAbsScalar(aa[j]);
70           else if (kk != k) future += PetscAbsScalar(aa[j]);
71         }
72         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
73         if (past / future > best) {
74           best       = past / future;
75           newcurrent = k;
76         }
77       }
78     }
79     PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
80     current      = newcurrent;
81     order[i + 1] = current;
82   }
83   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
84   *icol = *irow;
85   PetscCall(PetscObjectReference((PetscObject)*irow));
86   PetscCall(PetscFree(done));
87   PetscCall(PetscFree(order));
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 
91 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
92 {
93   PetscFunctionBegin;
94   *type = MATSOLVERPETSC;
95   PetscFunctionReturn(PETSC_SUCCESS);
96 }
97 
98 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
99 {
100   PetscInt n = A->rmap->n;
101 
102   PetscFunctionBegin;
103 #if defined(PETSC_USE_COMPLEX)
104   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY or ICC Factor is not supported");
105 #endif
106   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
107   PetscCall(MatSetSizes(*B, n, n, n, n));
108   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
109     PetscCall(MatSetType(*B, MATSEQAIJ));
110 
111     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
112     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
113 
114     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
115     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
116     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
117     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
118   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
119     PetscCall(MatSetType(*B, MATSEQSBAIJ));
120     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
121 
122     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
123     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
124     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
125     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
126   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
127   (*B)->factortype = ftype;
128 
129   PetscCall(PetscFree((*B)->solvertype));
130   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
131   (*B)->canuseordering = PETSC_TRUE;
132   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
133   PetscFunctionReturn(PETSC_SUCCESS);
134 }
135 
136 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
137 {
138   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
139   IS                 isicol;
140   const PetscInt    *r, *ic;
141   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
142   PetscInt          *bi, *bj, *ajtmp;
143   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
144   PetscReal          f;
145   PetscInt           nlnk, *lnk, k, **bi_ptr;
146   PetscFreeSpaceList free_space = NULL, current_space = NULL;
147   PetscBT            lnkbt;
148   PetscBool          missing;
149 
150   PetscFunctionBegin;
151   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
152   PetscCall(MatMissingDiagonal(A, &missing, &i));
153   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
154 
155   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
156   PetscCall(ISGetIndices(isrow, &r));
157   PetscCall(ISGetIndices(isicol, &ic));
158 
159   /* get new row pointers */
160   PetscCall(PetscMalloc1(n + 1, &bi));
161   bi[0] = 0;
162 
163   /* bdiag is location of diagonal in factor */
164   PetscCall(PetscMalloc1(n + 1, &bdiag));
165   bdiag[0] = 0;
166 
167   /* linked list for storing column indices of the active row */
168   nlnk = n + 1;
169   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
170 
171   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
172 
173   /* initial FreeSpace size is f*(ai[n]+1) */
174   f = info->fill;
175   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
176   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
177   current_space = free_space;
178 
179   for (i = 0; i < n; i++) {
180     /* copy previous fill into linked list */
181     nzi   = 0;
182     nnz   = ai[r[i] + 1] - ai[r[i]];
183     ajtmp = aj + ai[r[i]];
184     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
185     nzi += nlnk;
186 
187     /* add pivot rows into linked list */
188     row = lnk[n];
189     while (row < i) {
190       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
191       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
192       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
193       nzi += nlnk;
194       row = lnk[row];
195     }
196     bi[i + 1] = bi[i] + nzi;
197     im[i]     = nzi;
198 
199     /* mark bdiag */
200     nzbd = 0;
201     nnz  = nzi;
202     k    = lnk[n];
203     while (nnz-- && k < i) {
204       nzbd++;
205       k = lnk[k];
206     }
207     bdiag[i] = bi[i] + nzbd;
208 
209     /* if free space is not available, make more free space */
210     if (current_space->local_remaining < nzi) {
211       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
212       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
213       reallocs++;
214     }
215 
216     /* copy data into free space, then initialize lnk */
217     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
218 
219     bi_ptr[i] = current_space->array;
220     current_space->array += nzi;
221     current_space->local_used += nzi;
222     current_space->local_remaining -= nzi;
223   }
224 #if defined(PETSC_USE_INFO)
225   if (ai[n] != 0) {
226     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
227     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
228     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
229     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
230     PetscCall(PetscInfo(A, "for best performance.\n"));
231   } else {
232     PetscCall(PetscInfo(A, "Empty matrix\n"));
233   }
234 #endif
235 
236   PetscCall(ISRestoreIndices(isrow, &r));
237   PetscCall(ISRestoreIndices(isicol, &ic));
238 
239   /* destroy list of free space and other temporary array(s) */
240   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
241   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
242   PetscCall(PetscLLDestroy(lnk, lnkbt));
243   PetscCall(PetscFree2(bi_ptr, im));
244 
245   /* put together the new matrix */
246   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
247   b = (Mat_SeqAIJ *)(B)->data;
248 
249   b->free_a       = PETSC_TRUE;
250   b->free_ij      = PETSC_TRUE;
251   b->singlemalloc = PETSC_FALSE;
252 
253   PetscCall(PetscMalloc1(bi[n] + 1, &b->a));
254   b->j    = bj;
255   b->i    = bi;
256   b->diag = bdiag;
257   b->ilen = NULL;
258   b->imax = NULL;
259   b->row  = isrow;
260   b->col  = iscol;
261   PetscCall(PetscObjectReference((PetscObject)isrow));
262   PetscCall(PetscObjectReference((PetscObject)iscol));
263   b->icol = isicol;
264   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
265 
266   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
267   b->maxnz = b->nz = bi[n];
268 
269   (B)->factortype            = MAT_FACTOR_LU;
270   (B)->info.factor_mallocs   = reallocs;
271   (B)->info.fill_ratio_given = f;
272 
273   if (ai[n]) {
274     (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
275   } else {
276     (B)->info.fill_ratio_needed = 0.0;
277   }
278   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
279   if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
280   PetscFunctionReturn(PETSC_SUCCESS);
281 }
282 
283 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
284 {
285   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
286   IS                 isicol;
287   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
288   PetscInt           i, n = A->rmap->n;
289   PetscInt          *bi, *bj;
290   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
291   PetscReal          f;
292   PetscInt           nlnk, *lnk, k, **bi_ptr;
293   PetscFreeSpaceList free_space = NULL, current_space = NULL;
294   PetscBT            lnkbt;
295   PetscBool          missing;
296 
297   PetscFunctionBegin;
298   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
299   PetscCall(MatMissingDiagonal(A, &missing, &i));
300   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
301 
302   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
303   PetscCall(ISGetIndices(isrow, &r));
304   PetscCall(ISGetIndices(isicol, &ic));
305 
306   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
307   PetscCall(PetscMalloc1(n + 1, &bi));
308   PetscCall(PetscMalloc1(n + 1, &bdiag));
309   bi[0] = bdiag[0] = 0;
310 
311   /* linked list for storing column indices of the active row */
312   nlnk = n + 1;
313   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
314 
315   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
316 
317   /* initial FreeSpace size is f*(ai[n]+1) */
318   f = info->fill;
319   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
320   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
321   current_space = free_space;
322 
323   for (i = 0; i < n; i++) {
324     /* copy previous fill into linked list */
325     nzi   = 0;
326     nnz   = ai[r[i] + 1] - ai[r[i]];
327     ajtmp = aj + ai[r[i]];
328     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
329     nzi += nlnk;
330 
331     /* add pivot rows into linked list */
332     row = lnk[n];
333     while (row < i) {
334       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
335       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
336       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
337       nzi += nlnk;
338       row = lnk[row];
339     }
340     bi[i + 1] = bi[i] + nzi;
341     im[i]     = nzi;
342 
343     /* mark bdiag */
344     nzbd = 0;
345     nnz  = nzi;
346     k    = lnk[n];
347     while (nnz-- && k < i) {
348       nzbd++;
349       k = lnk[k];
350     }
351     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
352 
353     /* if free space is not available, make more free space */
354     if (current_space->local_remaining < nzi) {
355       /* estimated additional space needed */
356       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
357       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
358       reallocs++;
359     }
360 
361     /* copy data into free space, then initialize lnk */
362     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
363 
364     bi_ptr[i] = current_space->array;
365     current_space->array += nzi;
366     current_space->local_used += nzi;
367     current_space->local_remaining -= nzi;
368   }
369 
370   PetscCall(ISRestoreIndices(isrow, &r));
371   PetscCall(ISRestoreIndices(isicol, &ic));
372 
373   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
374   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
375   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
376   PetscCall(PetscLLDestroy(lnk, lnkbt));
377   PetscCall(PetscFree2(bi_ptr, im));
378 
379   /* put together the new matrix */
380   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
381   b = (Mat_SeqAIJ *)(B)->data;
382 
383   b->free_a       = PETSC_TRUE;
384   b->free_ij      = PETSC_TRUE;
385   b->singlemalloc = PETSC_FALSE;
386 
387   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
388 
389   b->j    = bj;
390   b->i    = bi;
391   b->diag = bdiag;
392   b->ilen = NULL;
393   b->imax = NULL;
394   b->row  = isrow;
395   b->col  = iscol;
396   PetscCall(PetscObjectReference((PetscObject)isrow));
397   PetscCall(PetscObjectReference((PetscObject)iscol));
398   b->icol = isicol;
399   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
400 
401   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
402   b->maxnz = b->nz = bdiag[0] + 1;
403 
404   B->factortype            = MAT_FACTOR_LU;
405   B->info.factor_mallocs   = reallocs;
406   B->info.fill_ratio_given = f;
407 
408   if (ai[n]) {
409     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
410   } else {
411     B->info.fill_ratio_needed = 0.0;
412   }
413 #if defined(PETSC_USE_INFO)
414   if (ai[n] != 0) {
415     PetscReal af = B->info.fill_ratio_needed;
416     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
417     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
418     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
419     PetscCall(PetscInfo(A, "for best performance.\n"));
420   } else {
421     PetscCall(PetscInfo(A, "Empty matrix\n"));
422   }
423 #endif
424   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
425   if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
426   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
427   PetscFunctionReturn(PETSC_SUCCESS);
428 }
429 
430 /*
431     Trouble in factorization, should we dump the original matrix?
432 */
433 PetscErrorCode MatFactorDumpMatrix(Mat A)
434 {
435   PetscBool flg = PETSC_FALSE;
436 
437   PetscFunctionBegin;
438   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
439   if (flg) {
440     PetscViewer viewer;
441     char        filename[PETSC_MAX_PATH_LEN];
442 
443     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
444     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
445     PetscCall(MatView(A, viewer));
446     PetscCall(PetscViewerDestroy(&viewer));
447   }
448   PetscFunctionReturn(PETSC_SUCCESS);
449 }
450 
451 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
452 {
453   Mat              C = B;
454   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
455   IS               isrow = b->row, isicol = b->icol;
456   const PetscInt  *r, *ic, *ics;
457   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
458   PetscInt         i, j, k, nz, nzL, row, *pj;
459   const PetscInt  *ajtmp, *bjtmp;
460   MatScalar       *rtmp, *pc, multiplier, *pv;
461   const MatScalar *aa = a->a, *v;
462   PetscBool        row_identity, col_identity;
463   FactorShiftCtx   sctx;
464   const PetscInt  *ddiag;
465   PetscReal        rs;
466   MatScalar        d;
467 
468   PetscFunctionBegin;
469   /* MatPivotSetUp(): initialize shift context sctx */
470   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
471 
472   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
473     ddiag          = a->diag;
474     sctx.shift_top = info->zeropivot;
475     for (i = 0; i < n; i++) {
476       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
477       d  = (aa)[ddiag[i]];
478       rs = -PetscAbsScalar(d) - PetscRealPart(d);
479       v  = aa + ai[i];
480       nz = ai[i + 1] - ai[i];
481       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
482       if (rs > sctx.shift_top) sctx.shift_top = rs;
483     }
484     sctx.shift_top *= 1.1;
485     sctx.nshift_max = 5;
486     sctx.shift_lo   = 0.;
487     sctx.shift_hi   = 1.;
488   }
489 
490   PetscCall(ISGetIndices(isrow, &r));
491   PetscCall(ISGetIndices(isicol, &ic));
492   PetscCall(PetscMalloc1(n + 1, &rtmp));
493   ics = ic;
494 
495   do {
496     sctx.newshift = PETSC_FALSE;
497     for (i = 0; i < n; i++) {
498       /* zero rtmp */
499       /* L part */
500       nz    = bi[i + 1] - bi[i];
501       bjtmp = bj + bi[i];
502       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
503 
504       /* U part */
505       nz    = bdiag[i] - bdiag[i + 1];
506       bjtmp = bj + bdiag[i + 1] + 1;
507       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
508 
509       /* load in initial (unfactored row) */
510       nz    = ai[r[i] + 1] - ai[r[i]];
511       ajtmp = aj + ai[r[i]];
512       v     = aa + ai[r[i]];
513       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
514       /* ZeropivotApply() */
515       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
516 
517       /* elimination */
518       bjtmp = bj + bi[i];
519       row   = *bjtmp++;
520       nzL   = bi[i + 1] - bi[i];
521       for (k = 0; k < nzL; k++) {
522         pc = rtmp + row;
523         if (*pc != 0.0) {
524           pv         = b->a + bdiag[row];
525           multiplier = *pc * (*pv);
526           *pc        = multiplier;
527 
528           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
529           pv = b->a + bdiag[row + 1] + 1;
530           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
531 
532           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
533           PetscCall(PetscLogFlops(1 + 2.0 * nz));
534         }
535         row = *bjtmp++;
536       }
537 
538       /* finished row so stick it into b->a */
539       rs = 0.0;
540       /* L part */
541       pv = b->a + bi[i];
542       pj = b->j + bi[i];
543       nz = bi[i + 1] - bi[i];
544       for (j = 0; j < nz; j++) {
545         pv[j] = rtmp[pj[j]];
546         rs += PetscAbsScalar(pv[j]);
547       }
548 
549       /* U part */
550       pv = b->a + bdiag[i + 1] + 1;
551       pj = b->j + bdiag[i + 1] + 1;
552       nz = bdiag[i] - bdiag[i + 1] - 1;
553       for (j = 0; j < nz; j++) {
554         pv[j] = rtmp[pj[j]];
555         rs += PetscAbsScalar(pv[j]);
556       }
557 
558       sctx.rs = rs;
559       sctx.pv = rtmp[i];
560       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
561       if (sctx.newshift) break; /* break for-loop */
562       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
563 
564       /* Mark diagonal and invert diagonal for simpler triangular solves */
565       pv  = b->a + bdiag[i];
566       *pv = 1.0 / rtmp[i];
567 
568     } /* endof for (i=0; i<n; i++) { */
569 
570     /* MatPivotRefine() */
571     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
572       /*
573        * if no shift in this attempt & shifting & started shifting & can refine,
574        * then try lower shift
575        */
576       sctx.shift_hi       = sctx.shift_fraction;
577       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
578       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
579       sctx.newshift       = PETSC_TRUE;
580       sctx.nshift++;
581     }
582   } while (sctx.newshift);
583 
584   PetscCall(PetscFree(rtmp));
585   PetscCall(ISRestoreIndices(isicol, &ic));
586   PetscCall(ISRestoreIndices(isrow, &r));
587 
588   PetscCall(ISIdentity(isrow, &row_identity));
589   PetscCall(ISIdentity(isicol, &col_identity));
590   if (b->inode.size) {
591     C->ops->solve = MatSolve_SeqAIJ_Inode;
592   } else if (row_identity && col_identity) {
593     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
594   } else {
595     C->ops->solve = MatSolve_SeqAIJ;
596   }
597   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
598   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
599   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
600   C->ops->matsolve          = MatMatSolve_SeqAIJ;
601   C->assembled              = PETSC_TRUE;
602   C->preallocated           = PETSC_TRUE;
603 
604   PetscCall(PetscLogFlops(C->cmap->n));
605 
606   /* MatShiftView(A,info,&sctx) */
607   if (sctx.nshift) {
608     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
609       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
610     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
611       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
612     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
613       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
614     }
615   }
616   PetscFunctionReturn(PETSC_SUCCESS);
617 }
618 
619 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
620 {
621   Mat              C = B;
622   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
623   IS               isrow = b->row, isicol = b->icol;
624   const PetscInt  *r, *ic, *ics;
625   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
626   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
627   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
628   MatScalar       *pv, *rtmp, *pc, multiplier, d;
629   const MatScalar *v, *aa = a->a;
630   PetscReal        rs = 0.0;
631   FactorShiftCtx   sctx;
632   const PetscInt  *ddiag;
633   PetscBool        row_identity, col_identity;
634 
635   PetscFunctionBegin;
636   /* MatPivotSetUp(): initialize shift context sctx */
637   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
638 
639   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
640     ddiag          = a->diag;
641     sctx.shift_top = info->zeropivot;
642     for (i = 0; i < n; i++) {
643       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
644       d  = (aa)[ddiag[i]];
645       rs = -PetscAbsScalar(d) - PetscRealPart(d);
646       v  = aa + ai[i];
647       nz = ai[i + 1] - ai[i];
648       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
649       if (rs > sctx.shift_top) sctx.shift_top = rs;
650     }
651     sctx.shift_top *= 1.1;
652     sctx.nshift_max = 5;
653     sctx.shift_lo   = 0.;
654     sctx.shift_hi   = 1.;
655   }
656 
657   PetscCall(ISGetIndices(isrow, &r));
658   PetscCall(ISGetIndices(isicol, &ic));
659   PetscCall(PetscMalloc1(n + 1, &rtmp));
660   ics = ic;
661 
662   do {
663     sctx.newshift = PETSC_FALSE;
664     for (i = 0; i < n; i++) {
665       nz    = bi[i + 1] - bi[i];
666       bjtmp = bj + bi[i];
667       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
668 
669       /* load in initial (unfactored row) */
670       nz    = ai[r[i] + 1] - ai[r[i]];
671       ajtmp = aj + ai[r[i]];
672       v     = aa + ai[r[i]];
673       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
674       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
675 
676       row = *bjtmp++;
677       while (row < i) {
678         pc = rtmp + row;
679         if (*pc != 0.0) {
680           pv         = b->a + diag_offset[row];
681           pj         = b->j + diag_offset[row] + 1;
682           multiplier = *pc / *pv++;
683           *pc        = multiplier;
684           nz         = bi[row + 1] - diag_offset[row] - 1;
685           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
686           PetscCall(PetscLogFlops(1 + 2.0 * nz));
687         }
688         row = *bjtmp++;
689       }
690       /* finished row so stick it into b->a */
691       pv   = b->a + bi[i];
692       pj   = b->j + bi[i];
693       nz   = bi[i + 1] - bi[i];
694       diag = diag_offset[i] - bi[i];
695       rs   = 0.0;
696       for (j = 0; j < nz; j++) {
697         pv[j] = rtmp[pj[j]];
698         rs += PetscAbsScalar(pv[j]);
699       }
700       rs -= PetscAbsScalar(pv[diag]);
701 
702       sctx.rs = rs;
703       sctx.pv = pv[diag];
704       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
705       if (sctx.newshift) break;
706       pv[diag] = sctx.pv;
707     }
708 
709     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
710       /*
711        * if no shift in this attempt & shifting & started shifting & can refine,
712        * then try lower shift
713        */
714       sctx.shift_hi       = sctx.shift_fraction;
715       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
716       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
717       sctx.newshift       = PETSC_TRUE;
718       sctx.nshift++;
719     }
720   } while (sctx.newshift);
721 
722   /* invert diagonal entries for simpler triangular solves */
723   for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
724   PetscCall(PetscFree(rtmp));
725   PetscCall(ISRestoreIndices(isicol, &ic));
726   PetscCall(ISRestoreIndices(isrow, &r));
727 
728   PetscCall(ISIdentity(isrow, &row_identity));
729   PetscCall(ISIdentity(isicol, &col_identity));
730   if (row_identity && col_identity) {
731     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
732   } else {
733     C->ops->solve = MatSolve_SeqAIJ_inplace;
734   }
735   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
736   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
737   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
738   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
739 
740   C->assembled    = PETSC_TRUE;
741   C->preallocated = PETSC_TRUE;
742 
743   PetscCall(PetscLogFlops(C->cmap->n));
744   if (sctx.nshift) {
745     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
746       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
747     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
748       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
749     }
750   }
751   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
752   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
753 
754   PetscCall(MatSeqAIJCheckInode(C));
755   PetscFunctionReturn(PETSC_SUCCESS);
756 }
757 
758 /*
759    This routine implements inplace ILU(0) with row or/and column permutations.
760    Input:
761      A - original matrix
762    Output;
763      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
764          a->j (col index) is permuted by the inverse of colperm, then sorted
765          a->a reordered accordingly with a->j
766          a->diag (ptr to diagonal elements) is updated.
767 */
768 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
769 {
770   Mat_SeqAIJ      *a     = (Mat_SeqAIJ *)A->data;
771   IS               isrow = a->row, isicol = a->icol;
772   const PetscInt  *r, *ic, *ics;
773   PetscInt         i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
774   PetscInt        *ajtmp, nz, row;
775   PetscInt        *diag = a->diag, nbdiag, *pj;
776   PetscScalar     *rtmp, *pc, multiplier, d;
777   MatScalar       *pv, *v;
778   PetscReal        rs;
779   FactorShiftCtx   sctx;
780   const MatScalar *aa = a->a, *vtmp;
781 
782   PetscFunctionBegin;
783   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
784 
785   /* MatPivotSetUp(): initialize shift context sctx */
786   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
787 
788   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
789     const PetscInt *ddiag = a->diag;
790     sctx.shift_top        = info->zeropivot;
791     for (i = 0; i < n; i++) {
792       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
793       d    = (aa)[ddiag[i]];
794       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
795       vtmp = aa + ai[i];
796       nz   = ai[i + 1] - ai[i];
797       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
798       if (rs > sctx.shift_top) sctx.shift_top = rs;
799     }
800     sctx.shift_top *= 1.1;
801     sctx.nshift_max = 5;
802     sctx.shift_lo   = 0.;
803     sctx.shift_hi   = 1.;
804   }
805 
806   PetscCall(ISGetIndices(isrow, &r));
807   PetscCall(ISGetIndices(isicol, &ic));
808   PetscCall(PetscMalloc1(n + 1, &rtmp));
809   PetscCall(PetscArrayzero(rtmp, n + 1));
810   ics = ic;
811 
812 #if defined(MV)
813   sctx.shift_top      = 0.;
814   sctx.nshift_max     = 0;
815   sctx.shift_lo       = 0.;
816   sctx.shift_hi       = 0.;
817   sctx.shift_fraction = 0.;
818 
819   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
820     sctx.shift_top = 0.;
821     for (i = 0; i < n; i++) {
822       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
823       d  = (a->a)[diag[i]];
824       rs = -PetscAbsScalar(d) - PetscRealPart(d);
825       v  = a->a + ai[i];
826       nz = ai[i + 1] - ai[i];
827       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
828       if (rs > sctx.shift_top) sctx.shift_top = rs;
829     }
830     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
831     sctx.shift_top *= 1.1;
832     sctx.nshift_max = 5;
833     sctx.shift_lo   = 0.;
834     sctx.shift_hi   = 1.;
835   }
836 
837   sctx.shift_amount = 0.;
838   sctx.nshift       = 0;
839 #endif
840 
841   do {
842     sctx.newshift = PETSC_FALSE;
843     for (i = 0; i < n; i++) {
844       /* load in initial unfactored row */
845       nz    = ai[r[i] + 1] - ai[r[i]];
846       ajtmp = aj + ai[r[i]];
847       v     = a->a + ai[r[i]];
848       /* sort permuted ajtmp and values v accordingly */
849       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
850       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
851 
852       diag[r[i]] = ai[r[i]];
853       for (j = 0; j < nz; j++) {
854         rtmp[ajtmp[j]] = v[j];
855         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
856       }
857       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
858 
859       row = *ajtmp++;
860       while (row < i) {
861         pc = rtmp + row;
862         if (*pc != 0.0) {
863           pv = a->a + diag[r[row]];
864           pj = aj + diag[r[row]] + 1;
865 
866           multiplier = *pc / *pv++;
867           *pc        = multiplier;
868           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
869           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
870           PetscCall(PetscLogFlops(1 + 2.0 * nz));
871         }
872         row = *ajtmp++;
873       }
874       /* finished row so overwrite it onto a->a */
875       pv     = a->a + ai[r[i]];
876       pj     = aj + ai[r[i]];
877       nz     = ai[r[i] + 1] - ai[r[i]];
878       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
879 
880       rs = 0.0;
881       for (j = 0; j < nz; j++) {
882         pv[j] = rtmp[pj[j]];
883         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
884       }
885 
886       sctx.rs = rs;
887       sctx.pv = pv[nbdiag];
888       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
889       if (sctx.newshift) break;
890       pv[nbdiag] = sctx.pv;
891     }
892 
893     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
894       /*
895        * if no shift in this attempt & shifting & started shifting & can refine,
896        * then try lower shift
897        */
898       sctx.shift_hi       = sctx.shift_fraction;
899       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
900       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
901       sctx.newshift       = PETSC_TRUE;
902       sctx.nshift++;
903     }
904   } while (sctx.newshift);
905 
906   /* invert diagonal entries for simpler triangular solves */
907   for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];
908 
909   PetscCall(PetscFree(rtmp));
910   PetscCall(ISRestoreIndices(isicol, &ic));
911   PetscCall(ISRestoreIndices(isrow, &r));
912 
913   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
914   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
915   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
916   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
917 
918   A->assembled    = PETSC_TRUE;
919   A->preallocated = PETSC_TRUE;
920 
921   PetscCall(PetscLogFlops(A->cmap->n));
922   if (sctx.nshift) {
923     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
924       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
925     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
926       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
927     }
928   }
929   PetscFunctionReturn(PETSC_SUCCESS);
930 }
931 
932 /* ----------------------------------------------------------- */
933 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
934 {
935   Mat C;
936 
937   PetscFunctionBegin;
938   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
939   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
940   PetscCall(MatLUFactorNumeric(C, A, info));
941 
942   A->ops->solve          = C->ops->solve;
943   A->ops->solvetranspose = C->ops->solvetranspose;
944 
945   PetscCall(MatHeaderMerge(A, &C));
946   PetscFunctionReturn(PETSC_SUCCESS);
947 }
948 /* ----------------------------------------------------------- */
949 
950 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
951 {
952   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
953   IS                 iscol = a->col, isrow = a->row;
954   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
955   PetscInt           nz;
956   const PetscInt    *rout, *cout, *r, *c;
957   PetscScalar       *x, *tmp, *tmps, sum;
958   const PetscScalar *b;
959   const MatScalar   *aa = a->a, *v;
960 
961   PetscFunctionBegin;
962   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
963 
964   PetscCall(VecGetArrayRead(bb, &b));
965   PetscCall(VecGetArrayWrite(xx, &x));
966   tmp = a->solve_work;
967 
968   PetscCall(ISGetIndices(isrow, &rout));
969   r = rout;
970   PetscCall(ISGetIndices(iscol, &cout));
971   c = cout + (n - 1);
972 
973   /* forward solve the lower triangular */
974   tmp[0] = b[*r++];
975   tmps   = tmp;
976   for (i = 1; i < n; i++) {
977     v   = aa + ai[i];
978     vi  = aj + ai[i];
979     nz  = a->diag[i] - ai[i];
980     sum = b[*r++];
981     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
982     tmp[i] = sum;
983   }
984 
985   /* backward solve the upper triangular */
986   for (i = n - 1; i >= 0; i--) {
987     v   = aa + a->diag[i] + 1;
988     vi  = aj + a->diag[i] + 1;
989     nz  = ai[i + 1] - a->diag[i] - 1;
990     sum = tmp[i];
991     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
992     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
993   }
994 
995   PetscCall(ISRestoreIndices(isrow, &rout));
996   PetscCall(ISRestoreIndices(iscol, &cout));
997   PetscCall(VecRestoreArrayRead(bb, &b));
998   PetscCall(VecRestoreArrayWrite(xx, &x));
999   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1000   PetscFunctionReturn(PETSC_SUCCESS);
1001 }
1002 
1003 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1004 {
1005   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1006   IS                 iscol = a->col, isrow = a->row;
1007   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1008   PetscInt           nz, neq, ldb, ldx;
1009   const PetscInt    *rout, *cout, *r, *c;
1010   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
1011   const PetscScalar *b, *aa  = a->a, *v;
1012   PetscBool          isdense;
1013 
1014   PetscFunctionBegin;
1015   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1016   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1017   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1018   if (X != B) {
1019     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1020     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1021   }
1022   PetscCall(MatDenseGetArrayRead(B, &b));
1023   PetscCall(MatDenseGetLDA(B, &ldb));
1024   PetscCall(MatDenseGetArray(X, &x));
1025   PetscCall(MatDenseGetLDA(X, &ldx));
1026   PetscCall(ISGetIndices(isrow, &rout));
1027   r = rout;
1028   PetscCall(ISGetIndices(iscol, &cout));
1029   c = cout;
1030   for (neq = 0; neq < B->cmap->n; neq++) {
1031     /* forward solve the lower triangular */
1032     tmp[0] = b[r[0]];
1033     tmps   = tmp;
1034     for (i = 1; i < n; i++) {
1035       v   = aa + ai[i];
1036       vi  = aj + ai[i];
1037       nz  = a->diag[i] - ai[i];
1038       sum = b[r[i]];
1039       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1040       tmp[i] = sum;
1041     }
1042     /* backward solve the upper triangular */
1043     for (i = n - 1; i >= 0; i--) {
1044       v   = aa + a->diag[i] + 1;
1045       vi  = aj + a->diag[i] + 1;
1046       nz  = ai[i + 1] - a->diag[i] - 1;
1047       sum = tmp[i];
1048       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1049       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1050     }
1051     b += ldb;
1052     x += ldx;
1053   }
1054   PetscCall(ISRestoreIndices(isrow, &rout));
1055   PetscCall(ISRestoreIndices(iscol, &cout));
1056   PetscCall(MatDenseRestoreArrayRead(B, &b));
1057   PetscCall(MatDenseRestoreArray(X, &x));
1058   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1059   PetscFunctionReturn(PETSC_SUCCESS);
1060 }
1061 
1062 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1063 {
1064   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1065   IS                 iscol = a->col, isrow = a->row;
1066   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1067   PetscInt           nz, neq, ldb, ldx;
1068   const PetscInt    *rout, *cout, *r, *c;
1069   PetscScalar       *x, *tmp = a->solve_work, sum;
1070   const PetscScalar *b, *aa  = a->a, *v;
1071   PetscBool          isdense;
1072 
1073   PetscFunctionBegin;
1074   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1075   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1076   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1077   if (X != B) {
1078     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1079     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1080   }
1081   PetscCall(MatDenseGetArrayRead(B, &b));
1082   PetscCall(MatDenseGetLDA(B, &ldb));
1083   PetscCall(MatDenseGetArray(X, &x));
1084   PetscCall(MatDenseGetLDA(X, &ldx));
1085   PetscCall(ISGetIndices(isrow, &rout));
1086   r = rout;
1087   PetscCall(ISGetIndices(iscol, &cout));
1088   c = cout;
1089   for (neq = 0; neq < B->cmap->n; neq++) {
1090     /* forward solve the lower triangular */
1091     tmp[0] = b[r[0]];
1092     v      = aa;
1093     vi     = aj;
1094     for (i = 1; i < n; i++) {
1095       nz  = ai[i + 1] - ai[i];
1096       sum = b[r[i]];
1097       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1098       tmp[i] = sum;
1099       v += nz;
1100       vi += nz;
1101     }
1102     /* backward solve the upper triangular */
1103     for (i = n - 1; i >= 0; i--) {
1104       v   = aa + adiag[i + 1] + 1;
1105       vi  = aj + adiag[i + 1] + 1;
1106       nz  = adiag[i] - adiag[i + 1] - 1;
1107       sum = tmp[i];
1108       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1109       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1110     }
1111     b += ldb;
1112     x += ldx;
1113   }
1114   PetscCall(ISRestoreIndices(isrow, &rout));
1115   PetscCall(ISRestoreIndices(iscol, &cout));
1116   PetscCall(MatDenseRestoreArrayRead(B, &b));
1117   PetscCall(MatDenseRestoreArray(X, &x));
1118   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1119   PetscFunctionReturn(PETSC_SUCCESS);
1120 }
1121 
1122 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1123 {
1124   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1125   IS                 iscol = a->col, isrow = a->row;
1126   const PetscInt    *r, *c, *rout, *cout;
1127   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1128   PetscInt           nz, row;
1129   PetscScalar       *x, *tmp, *tmps, sum;
1130   const PetscScalar *b;
1131   const MatScalar   *aa = a->a, *v;
1132 
1133   PetscFunctionBegin;
1134   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1135 
1136   PetscCall(VecGetArrayRead(bb, &b));
1137   PetscCall(VecGetArrayWrite(xx, &x));
1138   tmp = a->solve_work;
1139 
1140   PetscCall(ISGetIndices(isrow, &rout));
1141   r = rout;
1142   PetscCall(ISGetIndices(iscol, &cout));
1143   c = cout + (n - 1);
1144 
1145   /* forward solve the lower triangular */
1146   tmp[0] = b[*r++];
1147   tmps   = tmp;
1148   for (row = 1; row < n; row++) {
1149     i   = rout[row]; /* permuted row */
1150     v   = aa + ai[i];
1151     vi  = aj + ai[i];
1152     nz  = a->diag[i] - ai[i];
1153     sum = b[*r++];
1154     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1155     tmp[row] = sum;
1156   }
1157 
1158   /* backward solve the upper triangular */
1159   for (row = n - 1; row >= 0; row--) {
1160     i   = rout[row]; /* permuted row */
1161     v   = aa + a->diag[i] + 1;
1162     vi  = aj + a->diag[i] + 1;
1163     nz  = ai[i + 1] - a->diag[i] - 1;
1164     sum = tmp[row];
1165     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1166     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1167   }
1168 
1169   PetscCall(ISRestoreIndices(isrow, &rout));
1170   PetscCall(ISRestoreIndices(iscol, &cout));
1171   PetscCall(VecRestoreArrayRead(bb, &b));
1172   PetscCall(VecRestoreArrayWrite(xx, &x));
1173   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1174   PetscFunctionReturn(PETSC_SUCCESS);
1175 }
1176 
1177 /* ----------------------------------------------------------- */
1178 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1179 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1180 {
1181   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1182   PetscInt           n  = A->rmap->n;
1183   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1184   PetscScalar       *x;
1185   const PetscScalar *b;
1186   const MatScalar   *aa = a->a;
1187 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1188   PetscInt         adiag_i, i, nz, ai_i;
1189   const PetscInt  *vi;
1190   const MatScalar *v;
1191   PetscScalar      sum;
1192 #endif
1193 
1194   PetscFunctionBegin;
1195   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1196 
1197   PetscCall(VecGetArrayRead(bb, &b));
1198   PetscCall(VecGetArrayWrite(xx, &x));
1199 
1200 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1201   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1202 #else
1203   /* forward solve the lower triangular */
1204   x[0] = b[0];
1205   for (i = 1; i < n; i++) {
1206     ai_i = ai[i];
1207     v    = aa + ai_i;
1208     vi   = aj + ai_i;
1209     nz   = adiag[i] - ai_i;
1210     sum  = b[i];
1211     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1212     x[i] = sum;
1213   }
1214 
1215   /* backward solve the upper triangular */
1216   for (i = n - 1; i >= 0; i--) {
1217     adiag_i = adiag[i];
1218     v       = aa + adiag_i + 1;
1219     vi      = aj + adiag_i + 1;
1220     nz      = ai[i + 1] - adiag_i - 1;
1221     sum     = x[i];
1222     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1223     x[i] = sum * aa[adiag_i];
1224   }
1225 #endif
1226   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1227   PetscCall(VecRestoreArrayRead(bb, &b));
1228   PetscCall(VecRestoreArrayWrite(xx, &x));
1229   PetscFunctionReturn(PETSC_SUCCESS);
1230 }
1231 
1232 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1233 {
1234   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1235   IS                 iscol = a->col, isrow = a->row;
1236   PetscInt           i, n                  = A->rmap->n, j;
1237   PetscInt           nz;
1238   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1239   PetscScalar       *x, *tmp, sum;
1240   const PetscScalar *b;
1241   const MatScalar   *aa = a->a, *v;
1242 
1243   PetscFunctionBegin;
1244   if (yy != xx) PetscCall(VecCopy(yy, xx));
1245 
1246   PetscCall(VecGetArrayRead(bb, &b));
1247   PetscCall(VecGetArray(xx, &x));
1248   tmp = a->solve_work;
1249 
1250   PetscCall(ISGetIndices(isrow, &rout));
1251   r = rout;
1252   PetscCall(ISGetIndices(iscol, &cout));
1253   c = cout + (n - 1);
1254 
1255   /* forward solve the lower triangular */
1256   tmp[0] = b[*r++];
1257   for (i = 1; i < n; i++) {
1258     v   = aa + ai[i];
1259     vi  = aj + ai[i];
1260     nz  = a->diag[i] - ai[i];
1261     sum = b[*r++];
1262     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1263     tmp[i] = sum;
1264   }
1265 
1266   /* backward solve the upper triangular */
1267   for (i = n - 1; i >= 0; i--) {
1268     v   = aa + a->diag[i] + 1;
1269     vi  = aj + a->diag[i] + 1;
1270     nz  = ai[i + 1] - a->diag[i] - 1;
1271     sum = tmp[i];
1272     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1273     tmp[i] = sum * aa[a->diag[i]];
1274     x[*c--] += tmp[i];
1275   }
1276 
1277   PetscCall(ISRestoreIndices(isrow, &rout));
1278   PetscCall(ISRestoreIndices(iscol, &cout));
1279   PetscCall(VecRestoreArrayRead(bb, &b));
1280   PetscCall(VecRestoreArray(xx, &x));
1281   PetscCall(PetscLogFlops(2.0 * a->nz));
1282   PetscFunctionReturn(PETSC_SUCCESS);
1283 }
1284 
1285 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1286 {
1287   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1288   IS                 iscol = a->col, isrow = a->row;
1289   PetscInt           i, n                  = A->rmap->n, j;
1290   PetscInt           nz;
1291   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1292   PetscScalar       *x, *tmp, sum;
1293   const PetscScalar *b;
1294   const MatScalar   *aa = a->a, *v;
1295 
1296   PetscFunctionBegin;
1297   if (yy != xx) PetscCall(VecCopy(yy, xx));
1298 
1299   PetscCall(VecGetArrayRead(bb, &b));
1300   PetscCall(VecGetArray(xx, &x));
1301   tmp = a->solve_work;
1302 
1303   PetscCall(ISGetIndices(isrow, &rout));
1304   r = rout;
1305   PetscCall(ISGetIndices(iscol, &cout));
1306   c = cout;
1307 
1308   /* forward solve the lower triangular */
1309   tmp[0] = b[r[0]];
1310   v      = aa;
1311   vi     = aj;
1312   for (i = 1; i < n; i++) {
1313     nz  = ai[i + 1] - ai[i];
1314     sum = b[r[i]];
1315     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1316     tmp[i] = sum;
1317     v += nz;
1318     vi += nz;
1319   }
1320 
1321   /* backward solve the upper triangular */
1322   v  = aa + adiag[n - 1];
1323   vi = aj + adiag[n - 1];
1324   for (i = n - 1; i >= 0; i--) {
1325     nz  = adiag[i] - adiag[i + 1] - 1;
1326     sum = tmp[i];
1327     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1328     tmp[i] = sum * v[nz];
1329     x[c[i]] += tmp[i];
1330     v += nz + 1;
1331     vi += nz + 1;
1332   }
1333 
1334   PetscCall(ISRestoreIndices(isrow, &rout));
1335   PetscCall(ISRestoreIndices(iscol, &cout));
1336   PetscCall(VecRestoreArrayRead(bb, &b));
1337   PetscCall(VecRestoreArray(xx, &x));
1338   PetscCall(PetscLogFlops(2.0 * a->nz));
1339   PetscFunctionReturn(PETSC_SUCCESS);
1340 }
1341 
1342 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1343 {
1344   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1345   IS                 iscol = a->col, isrow = a->row;
1346   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1347   PetscInt           i, n = A->rmap->n, j;
1348   PetscInt           nz;
1349   PetscScalar       *x, *tmp, s1;
1350   const MatScalar   *aa = a->a, *v;
1351   const PetscScalar *b;
1352 
1353   PetscFunctionBegin;
1354   PetscCall(VecGetArrayRead(bb, &b));
1355   PetscCall(VecGetArrayWrite(xx, &x));
1356   tmp = a->solve_work;
1357 
1358   PetscCall(ISGetIndices(isrow, &rout));
1359   r = rout;
1360   PetscCall(ISGetIndices(iscol, &cout));
1361   c = cout;
1362 
1363   /* copy the b into temp work space according to permutation */
1364   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1365 
1366   /* forward solve the U^T */
1367   for (i = 0; i < n; i++) {
1368     v  = aa + diag[i];
1369     vi = aj + diag[i] + 1;
1370     nz = ai[i + 1] - diag[i] - 1;
1371     s1 = tmp[i];
1372     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1373     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1374     tmp[i] = s1;
1375   }
1376 
1377   /* backward solve the L^T */
1378   for (i = n - 1; i >= 0; i--) {
1379     v  = aa + diag[i] - 1;
1380     vi = aj + diag[i] - 1;
1381     nz = diag[i] - ai[i];
1382     s1 = tmp[i];
1383     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1384   }
1385 
1386   /* copy tmp into x according to permutation */
1387   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1388 
1389   PetscCall(ISRestoreIndices(isrow, &rout));
1390   PetscCall(ISRestoreIndices(iscol, &cout));
1391   PetscCall(VecRestoreArrayRead(bb, &b));
1392   PetscCall(VecRestoreArrayWrite(xx, &x));
1393 
1394   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1395   PetscFunctionReturn(PETSC_SUCCESS);
1396 }
1397 
1398 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1399 {
1400   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1401   IS                 iscol = a->col, isrow = a->row;
1402   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1403   PetscInt           i, n = A->rmap->n, j;
1404   PetscInt           nz;
1405   PetscScalar       *x, *tmp, s1;
1406   const MatScalar   *aa = a->a, *v;
1407   const PetscScalar *b;
1408 
1409   PetscFunctionBegin;
1410   PetscCall(VecGetArrayRead(bb, &b));
1411   PetscCall(VecGetArrayWrite(xx, &x));
1412   tmp = a->solve_work;
1413 
1414   PetscCall(ISGetIndices(isrow, &rout));
1415   r = rout;
1416   PetscCall(ISGetIndices(iscol, &cout));
1417   c = cout;
1418 
1419   /* copy the b into temp work space according to permutation */
1420   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1421 
1422   /* forward solve the U^T */
1423   for (i = 0; i < n; i++) {
1424     v  = aa + adiag[i + 1] + 1;
1425     vi = aj + adiag[i + 1] + 1;
1426     nz = adiag[i] - adiag[i + 1] - 1;
1427     s1 = tmp[i];
1428     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1429     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1430     tmp[i] = s1;
1431   }
1432 
1433   /* backward solve the L^T */
1434   for (i = n - 1; i >= 0; i--) {
1435     v  = aa + ai[i];
1436     vi = aj + ai[i];
1437     nz = ai[i + 1] - ai[i];
1438     s1 = tmp[i];
1439     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1440   }
1441 
1442   /* copy tmp into x according to permutation */
1443   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1444 
1445   PetscCall(ISRestoreIndices(isrow, &rout));
1446   PetscCall(ISRestoreIndices(iscol, &cout));
1447   PetscCall(VecRestoreArrayRead(bb, &b));
1448   PetscCall(VecRestoreArrayWrite(xx, &x));
1449 
1450   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1451   PetscFunctionReturn(PETSC_SUCCESS);
1452 }
1453 
1454 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1455 {
1456   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1457   IS                 iscol = a->col, isrow = a->row;
1458   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1459   PetscInt           i, n = A->rmap->n, j;
1460   PetscInt           nz;
1461   PetscScalar       *x, *tmp, s1;
1462   const MatScalar   *aa = a->a, *v;
1463   const PetscScalar *b;
1464 
1465   PetscFunctionBegin;
1466   if (zz != xx) PetscCall(VecCopy(zz, xx));
1467   PetscCall(VecGetArrayRead(bb, &b));
1468   PetscCall(VecGetArray(xx, &x));
1469   tmp = a->solve_work;
1470 
1471   PetscCall(ISGetIndices(isrow, &rout));
1472   r = rout;
1473   PetscCall(ISGetIndices(iscol, &cout));
1474   c = cout;
1475 
1476   /* copy the b into temp work space according to permutation */
1477   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1478 
1479   /* forward solve the U^T */
1480   for (i = 0; i < n; i++) {
1481     v  = aa + diag[i];
1482     vi = aj + diag[i] + 1;
1483     nz = ai[i + 1] - diag[i] - 1;
1484     s1 = tmp[i];
1485     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1486     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1487     tmp[i] = s1;
1488   }
1489 
1490   /* backward solve the L^T */
1491   for (i = n - 1; i >= 0; i--) {
1492     v  = aa + diag[i] - 1;
1493     vi = aj + diag[i] - 1;
1494     nz = diag[i] - ai[i];
1495     s1 = tmp[i];
1496     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1497   }
1498 
1499   /* copy tmp into x according to permutation */
1500   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1501 
1502   PetscCall(ISRestoreIndices(isrow, &rout));
1503   PetscCall(ISRestoreIndices(iscol, &cout));
1504   PetscCall(VecRestoreArrayRead(bb, &b));
1505   PetscCall(VecRestoreArray(xx, &x));
1506 
1507   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1508   PetscFunctionReturn(PETSC_SUCCESS);
1509 }
1510 
1511 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1512 {
1513   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1514   IS                 iscol = a->col, isrow = a->row;
1515   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1516   PetscInt           i, n = A->rmap->n, j;
1517   PetscInt           nz;
1518   PetscScalar       *x, *tmp, s1;
1519   const MatScalar   *aa = a->a, *v;
1520   const PetscScalar *b;
1521 
1522   PetscFunctionBegin;
1523   if (zz != xx) PetscCall(VecCopy(zz, xx));
1524   PetscCall(VecGetArrayRead(bb, &b));
1525   PetscCall(VecGetArray(xx, &x));
1526   tmp = a->solve_work;
1527 
1528   PetscCall(ISGetIndices(isrow, &rout));
1529   r = rout;
1530   PetscCall(ISGetIndices(iscol, &cout));
1531   c = cout;
1532 
1533   /* copy the b into temp work space according to permutation */
1534   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1535 
1536   /* forward solve the U^T */
1537   for (i = 0; i < n; i++) {
1538     v  = aa + adiag[i + 1] + 1;
1539     vi = aj + adiag[i + 1] + 1;
1540     nz = adiag[i] - adiag[i + 1] - 1;
1541     s1 = tmp[i];
1542     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1543     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1544     tmp[i] = s1;
1545   }
1546 
1547   /* backward solve the L^T */
1548   for (i = n - 1; i >= 0; i--) {
1549     v  = aa + ai[i];
1550     vi = aj + ai[i];
1551     nz = ai[i + 1] - ai[i];
1552     s1 = tmp[i];
1553     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1554   }
1555 
1556   /* copy tmp into x according to permutation */
1557   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1558 
1559   PetscCall(ISRestoreIndices(isrow, &rout));
1560   PetscCall(ISRestoreIndices(iscol, &cout));
1561   PetscCall(VecRestoreArrayRead(bb, &b));
1562   PetscCall(VecRestoreArray(xx, &x));
1563 
1564   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1565   PetscFunctionReturn(PETSC_SUCCESS);
1566 }
1567 
1568 /* ----------------------------------------------------------------*/
1569 
1570 /*
1571    ilu() under revised new data structure.
1572    Factored arrays bj and ba are stored as
1573      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1574 
1575    bi=fact->i is an array of size n+1, in which
1576      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1577      bi[n]:  points to L(n-1,n-1)+1
1578 
1579   bdiag=fact->diag is an array of size n+1,in which
1580      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1581      bdiag[n]: points to entry of U(n-1,0)-1
1582 
1583    U(i,:) contains bdiag[i] as its last entry, i.e.,
1584     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1585 */
1586 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1587 {
1588   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1589   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1590   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1591   IS             isicol;
1592 
1593   PetscFunctionBegin;
1594   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1595   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1596   b = (Mat_SeqAIJ *)(fact)->data;
1597 
1598   /* allocate matrix arrays for new data structure */
1599   PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i));
1600 
1601   b->singlemalloc = PETSC_TRUE;
1602   if (!b->diag) { PetscCall(PetscMalloc1(n + 1, &b->diag)); }
1603   bdiag = b->diag;
1604 
1605   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1606 
1607   /* set bi and bj with new data structure */
1608   bi = b->i;
1609   bj = b->j;
1610 
1611   /* L part */
1612   bi[0] = 0;
1613   for (i = 0; i < n; i++) {
1614     nz        = adiag[i] - ai[i];
1615     bi[i + 1] = bi[i] + nz;
1616     aj        = a->j + ai[i];
1617     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1618   }
1619 
1620   /* U part */
1621   bdiag[n] = bi[n] - 1;
1622   for (i = n - 1; i >= 0; i--) {
1623     nz = ai[i + 1] - adiag[i] - 1;
1624     aj = a->j + adiag[i] + 1;
1625     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1626     /* diag[i] */
1627     bj[k++]  = i;
1628     bdiag[i] = bdiag[i + 1] + nz + 1;
1629   }
1630 
1631   fact->factortype             = MAT_FACTOR_ILU;
1632   fact->info.factor_mallocs    = 0;
1633   fact->info.fill_ratio_given  = info->fill;
1634   fact->info.fill_ratio_needed = 1.0;
1635   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1636   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1637 
1638   b       = (Mat_SeqAIJ *)(fact)->data;
1639   b->row  = isrow;
1640   b->col  = iscol;
1641   b->icol = isicol;
1642   PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
1643   PetscCall(PetscObjectReference((PetscObject)isrow));
1644   PetscCall(PetscObjectReference((PetscObject)iscol));
1645   PetscFunctionReturn(PETSC_SUCCESS);
1646 }
1647 
1648 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1649 {
1650   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1651   IS                 isicol;
1652   const PetscInt    *r, *ic;
1653   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1654   PetscInt          *bi, *cols, nnz, *cols_lvl;
1655   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1656   PetscInt           i, levels, diagonal_fill;
1657   PetscBool          col_identity, row_identity, missing;
1658   PetscReal          f;
1659   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1660   PetscBT            lnkbt;
1661   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1662   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1663   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1664 
1665   PetscFunctionBegin;
1666   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1667   PetscCall(MatMissingDiagonal(A, &missing, &i));
1668   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1669 
1670   levels = (PetscInt)info->levels;
1671   PetscCall(ISIdentity(isrow, &row_identity));
1672   PetscCall(ISIdentity(iscol, &col_identity));
1673   if (!levels && row_identity && col_identity) {
1674     /* special case: ilu(0) with natural ordering */
1675     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1676     if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1677     PetscFunctionReturn(PETSC_SUCCESS);
1678   }
1679 
1680   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1681   PetscCall(ISGetIndices(isrow, &r));
1682   PetscCall(ISGetIndices(isicol, &ic));
1683 
1684   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1685   PetscCall(PetscMalloc1(n + 1, &bi));
1686   PetscCall(PetscMalloc1(n + 1, &bdiag));
1687   bi[0] = bdiag[0] = 0;
1688   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1689 
1690   /* create a linked list for storing column indices of the active row */
1691   nlnk = n + 1;
1692   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1693 
1694   /* initial FreeSpace size is f*(ai[n]+1) */
1695   f             = info->fill;
1696   diagonal_fill = (PetscInt)info->diagonal_fill;
1697   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1698   current_space = free_space;
1699   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1700   current_space_lvl = free_space_lvl;
1701   for (i = 0; i < n; i++) {
1702     nzi = 0;
1703     /* copy current row into linked list */
1704     nnz = ai[r[i] + 1] - ai[r[i]];
1705     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1706     cols   = aj + ai[r[i]];
1707     lnk[i] = -1; /* marker to indicate if diagonal exists */
1708     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1709     nzi += nlnk;
1710 
1711     /* make sure diagonal entry is included */
1712     if (diagonal_fill && lnk[i] == -1) {
1713       fm = n;
1714       while (lnk[fm] < i) fm = lnk[fm];
1715       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1716       lnk[fm]    = i;
1717       lnk_lvl[i] = 0;
1718       nzi++;
1719       dcount++;
1720     }
1721 
1722     /* add pivot rows into the active row */
1723     nzbd = 0;
1724     prow = lnk[n];
1725     while (prow < i) {
1726       nnz      = bdiag[prow];
1727       cols     = bj_ptr[prow] + nnz + 1;
1728       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1729       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1730       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1731       nzi += nlnk;
1732       prow = lnk[prow];
1733       nzbd++;
1734     }
1735     bdiag[i]  = nzbd;
1736     bi[i + 1] = bi[i] + nzi;
1737     /* if free space is not available, make more free space */
1738     if (current_space->local_remaining < nzi) {
1739       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1740       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1741       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1742       reallocs++;
1743     }
1744 
1745     /* copy data into free_space and free_space_lvl, then initialize lnk */
1746     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1747     bj_ptr[i]    = current_space->array;
1748     bjlvl_ptr[i] = current_space_lvl->array;
1749 
1750     /* make sure the active row i has diagonal entry */
1751     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1752 
1753     current_space->array += nzi;
1754     current_space->local_used += nzi;
1755     current_space->local_remaining -= nzi;
1756     current_space_lvl->array += nzi;
1757     current_space_lvl->local_used += nzi;
1758     current_space_lvl->local_remaining -= nzi;
1759   }
1760 
1761   PetscCall(ISRestoreIndices(isrow, &r));
1762   PetscCall(ISRestoreIndices(isicol, &ic));
1763   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1764   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1765   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1766 
1767   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1768   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1769   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1770 
1771 #if defined(PETSC_USE_INFO)
1772   {
1773     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1774     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1775     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1776     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1777     PetscCall(PetscInfo(A, "for best performance.\n"));
1778     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1779   }
1780 #endif
1781   /* put together the new matrix */
1782   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1783   b = (Mat_SeqAIJ *)(fact)->data;
1784 
1785   b->free_a       = PETSC_TRUE;
1786   b->free_ij      = PETSC_TRUE;
1787   b->singlemalloc = PETSC_FALSE;
1788 
1789   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
1790 
1791   b->j    = bj;
1792   b->i    = bi;
1793   b->diag = bdiag;
1794   b->ilen = NULL;
1795   b->imax = NULL;
1796   b->row  = isrow;
1797   b->col  = iscol;
1798   PetscCall(PetscObjectReference((PetscObject)isrow));
1799   PetscCall(PetscObjectReference((PetscObject)iscol));
1800   b->icol = isicol;
1801 
1802   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1803   /* In b structure:  Free imax, ilen, old a, old j.
1804      Allocate bdiag, solve_work, new a, new j */
1805   b->maxnz = b->nz = bdiag[0] + 1;
1806 
1807   (fact)->info.factor_mallocs    = reallocs;
1808   (fact)->info.fill_ratio_given  = f;
1809   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1810   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1811   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1812   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1813   PetscFunctionReturn(PETSC_SUCCESS);
1814 }
1815 
1816 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1817 {
1818   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1819   IS                 isicol;
1820   const PetscInt    *r, *ic;
1821   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1822   PetscInt          *bi, *cols, nnz, *cols_lvl;
1823   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1824   PetscInt           i, levels, diagonal_fill;
1825   PetscBool          col_identity, row_identity;
1826   PetscReal          f;
1827   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1828   PetscBT            lnkbt;
1829   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1830   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1831   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1832   PetscBool          missing;
1833 
1834   PetscFunctionBegin;
1835   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1836   PetscCall(MatMissingDiagonal(A, &missing, &i));
1837   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1838 
1839   f             = info->fill;
1840   levels        = (PetscInt)info->levels;
1841   diagonal_fill = (PetscInt)info->diagonal_fill;
1842 
1843   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1844 
1845   PetscCall(ISIdentity(isrow, &row_identity));
1846   PetscCall(ISIdentity(iscol, &col_identity));
1847   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1848     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));
1849 
1850     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1851     if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1852     fact->factortype               = MAT_FACTOR_ILU;
1853     (fact)->info.factor_mallocs    = 0;
1854     (fact)->info.fill_ratio_given  = info->fill;
1855     (fact)->info.fill_ratio_needed = 1.0;
1856 
1857     b       = (Mat_SeqAIJ *)(fact)->data;
1858     b->row  = isrow;
1859     b->col  = iscol;
1860     b->icol = isicol;
1861     PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1862     PetscCall(PetscObjectReference((PetscObject)isrow));
1863     PetscCall(PetscObjectReference((PetscObject)iscol));
1864     PetscFunctionReturn(PETSC_SUCCESS);
1865   }
1866 
1867   PetscCall(ISGetIndices(isrow, &r));
1868   PetscCall(ISGetIndices(isicol, &ic));
1869 
1870   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1871   PetscCall(PetscMalloc1(n + 1, &bi));
1872   PetscCall(PetscMalloc1(n + 1, &bdiag));
1873   bi[0] = bdiag[0] = 0;
1874 
1875   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1876 
1877   /* create a linked list for storing column indices of the active row */
1878   nlnk = n + 1;
1879   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1880 
1881   /* initial FreeSpace size is f*(ai[n]+1) */
1882   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1883   current_space = free_space;
1884   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1885   current_space_lvl = free_space_lvl;
1886 
1887   for (i = 0; i < n; i++) {
1888     nzi = 0;
1889     /* copy current row into linked list */
1890     nnz = ai[r[i] + 1] - ai[r[i]];
1891     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1892     cols   = aj + ai[r[i]];
1893     lnk[i] = -1; /* marker to indicate if diagonal exists */
1894     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1895     nzi += nlnk;
1896 
1897     /* make sure diagonal entry is included */
1898     if (diagonal_fill && lnk[i] == -1) {
1899       fm = n;
1900       while (lnk[fm] < i) fm = lnk[fm];
1901       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1902       lnk[fm]    = i;
1903       lnk_lvl[i] = 0;
1904       nzi++;
1905       dcount++;
1906     }
1907 
1908     /* add pivot rows into the active row */
1909     nzbd = 0;
1910     prow = lnk[n];
1911     while (prow < i) {
1912       nnz      = bdiag[prow];
1913       cols     = bj_ptr[prow] + nnz + 1;
1914       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1915       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1916       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1917       nzi += nlnk;
1918       prow = lnk[prow];
1919       nzbd++;
1920     }
1921     bdiag[i]  = nzbd;
1922     bi[i + 1] = bi[i] + nzi;
1923 
1924     /* if free space is not available, make more free space */
1925     if (current_space->local_remaining < nzi) {
1926       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
1927       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1928       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1929       reallocs++;
1930     }
1931 
1932     /* copy data into free_space and free_space_lvl, then initialize lnk */
1933     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1934     bj_ptr[i]    = current_space->array;
1935     bjlvl_ptr[i] = current_space_lvl->array;
1936 
1937     /* make sure the active row i has diagonal entry */
1938     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1939 
1940     current_space->array += nzi;
1941     current_space->local_used += nzi;
1942     current_space->local_remaining -= nzi;
1943     current_space_lvl->array += nzi;
1944     current_space_lvl->local_used += nzi;
1945     current_space_lvl->local_remaining -= nzi;
1946   }
1947 
1948   PetscCall(ISRestoreIndices(isrow, &r));
1949   PetscCall(ISRestoreIndices(isicol, &ic));
1950 
1951   /* destroy list of free space and other temporary arrays */
1952   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1953   PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
1954   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1955   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1956   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1957 
1958 #if defined(PETSC_USE_INFO)
1959   {
1960     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
1961     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1962     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1963     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1964     PetscCall(PetscInfo(A, "for best performance.\n"));
1965     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1966   }
1967 #endif
1968 
1969   /* put together the new matrix */
1970   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1971   b = (Mat_SeqAIJ *)(fact)->data;
1972 
1973   b->free_a       = PETSC_TRUE;
1974   b->free_ij      = PETSC_TRUE;
1975   b->singlemalloc = PETSC_FALSE;
1976 
1977   PetscCall(PetscMalloc1(bi[n], &b->a));
1978   b->j = bj;
1979   b->i = bi;
1980   for (i = 0; i < n; i++) bdiag[i] += bi[i];
1981   b->diag = bdiag;
1982   b->ilen = NULL;
1983   b->imax = NULL;
1984   b->row  = isrow;
1985   b->col  = iscol;
1986   PetscCall(PetscObjectReference((PetscObject)isrow));
1987   PetscCall(PetscObjectReference((PetscObject)iscol));
1988   b->icol = isicol;
1989   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1990   /* In b structure:  Free imax, ilen, old a, old j.
1991      Allocate bdiag, solve_work, new a, new j */
1992   b->maxnz = b->nz = bi[n];
1993 
1994   (fact)->info.factor_mallocs    = reallocs;
1995   (fact)->info.fill_ratio_given  = f;
1996   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
1997   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
1998   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1999   PetscFunctionReturn(PETSC_SUCCESS);
2000 }
2001 
2002 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2003 {
2004   Mat             C  = B;
2005   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2006   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2007   IS              ip = b->row, iip = b->icol;
2008   const PetscInt *rip, *riip;
2009   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
2010   PetscInt       *ai = a->i, *aj = a->j;
2011   PetscInt        k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2012   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2013   PetscBool       perm_identity;
2014   FactorShiftCtx  sctx;
2015   PetscReal       rs;
2016   MatScalar       d, *v;
2017 
2018   PetscFunctionBegin;
2019   /* MatPivotSetUp(): initialize shift context sctx */
2020   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2021 
2022   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2023     sctx.shift_top = info->zeropivot;
2024     for (i = 0; i < mbs; i++) {
2025       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2026       d  = (aa)[a->diag[i]];
2027       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2028       v  = aa + ai[i];
2029       nz = ai[i + 1] - ai[i];
2030       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2031       if (rs > sctx.shift_top) sctx.shift_top = rs;
2032     }
2033     sctx.shift_top *= 1.1;
2034     sctx.nshift_max = 5;
2035     sctx.shift_lo   = 0.;
2036     sctx.shift_hi   = 1.;
2037   }
2038 
2039   PetscCall(ISGetIndices(ip, &rip));
2040   PetscCall(ISGetIndices(iip, &riip));
2041 
2042   /* allocate working arrays
2043      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2044      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2045   */
2046   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
2047 
2048   do {
2049     sctx.newshift = PETSC_FALSE;
2050 
2051     for (i = 0; i < mbs; i++) c2r[i] = mbs;
2052     if (mbs) il[0] = 0;
2053 
2054     for (k = 0; k < mbs; k++) {
2055       /* zero rtmp */
2056       nz    = bi[k + 1] - bi[k];
2057       bjtmp = bj + bi[k];
2058       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2059 
2060       /* load in initial unfactored row */
2061       bval = ba + bi[k];
2062       jmin = ai[rip[k]];
2063       jmax = ai[rip[k] + 1];
2064       for (j = jmin; j < jmax; j++) {
2065         col = riip[aj[j]];
2066         if (col >= k) { /* only take upper triangular entry */
2067           rtmp[col] = aa[j];
2068           *bval++   = 0.0; /* for in-place factorization */
2069         }
2070       }
2071       /* shift the diagonal of the matrix: ZeropivotApply() */
2072       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2073 
2074       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2075       dk = rtmp[k];
2076       i  = c2r[k]; /* first row to be added to k_th row  */
2077 
2078       while (i < k) {
2079         nexti = c2r[i]; /* next row to be added to k_th row */
2080 
2081         /* compute multiplier, update diag(k) and U(i,k) */
2082         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
2083         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2084         dk += uikdi * ba[ili];           /* update diag[k] */
2085         ba[ili] = uikdi;                 /* -U(i,k) */
2086 
2087         /* add multiple of row i to k-th row */
2088         jmin = ili + 1;
2089         jmax = bi[i + 1];
2090         if (jmin < jmax) {
2091           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2092           /* update il and c2r for row i */
2093           il[i]  = jmin;
2094           j      = bj[jmin];
2095           c2r[i] = c2r[j];
2096           c2r[j] = i;
2097         }
2098         i = nexti;
2099       }
2100 
2101       /* copy data into U(k,:) */
2102       rs   = 0.0;
2103       jmin = bi[k];
2104       jmax = bi[k + 1] - 1;
2105       if (jmin < jmax) {
2106         for (j = jmin; j < jmax; j++) {
2107           col   = bj[j];
2108           ba[j] = rtmp[col];
2109           rs += PetscAbsScalar(ba[j]);
2110         }
2111         /* add the k-th row into il and c2r */
2112         il[k]  = jmin;
2113         i      = bj[jmin];
2114         c2r[k] = c2r[i];
2115         c2r[i] = k;
2116       }
2117 
2118       /* MatPivotCheck() */
2119       sctx.rs = rs;
2120       sctx.pv = dk;
2121       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2122       if (sctx.newshift) break;
2123       dk = sctx.pv;
2124 
2125       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2126     }
2127   } while (sctx.newshift);
2128 
2129   PetscCall(PetscFree3(rtmp, il, c2r));
2130   PetscCall(ISRestoreIndices(ip, &rip));
2131   PetscCall(ISRestoreIndices(iip, &riip));
2132 
2133   PetscCall(ISIdentity(ip, &perm_identity));
2134   if (perm_identity) {
2135     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2136     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2137     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2138     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2139   } else {
2140     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2141     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2142     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2143     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2144   }
2145 
2146   C->assembled    = PETSC_TRUE;
2147   C->preallocated = PETSC_TRUE;
2148 
2149   PetscCall(PetscLogFlops(C->rmap->n));
2150 
2151   /* MatPivotView() */
2152   if (sctx.nshift) {
2153     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2154       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2155     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2156       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2157     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2158       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2159     }
2160   }
2161   PetscFunctionReturn(PETSC_SUCCESS);
2162 }
2163 
2164 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2165 {
2166   Mat             C  = B;
2167   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2168   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2169   IS              ip = b->row, iip = b->icol;
2170   const PetscInt *rip, *riip;
2171   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2172   PetscInt       *ai = a->i, *aj = a->j;
2173   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2174   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2175   PetscBool       perm_identity;
2176   FactorShiftCtx  sctx;
2177   PetscReal       rs;
2178   MatScalar       d, *v;
2179 
2180   PetscFunctionBegin;
2181   /* MatPivotSetUp(): initialize shift context sctx */
2182   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2183 
2184   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2185     sctx.shift_top = info->zeropivot;
2186     for (i = 0; i < mbs; i++) {
2187       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2188       d  = (aa)[a->diag[i]];
2189       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2190       v  = aa + ai[i];
2191       nz = ai[i + 1] - ai[i];
2192       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2193       if (rs > sctx.shift_top) sctx.shift_top = rs;
2194     }
2195     sctx.shift_top *= 1.1;
2196     sctx.nshift_max = 5;
2197     sctx.shift_lo   = 0.;
2198     sctx.shift_hi   = 1.;
2199   }
2200 
2201   PetscCall(ISGetIndices(ip, &rip));
2202   PetscCall(ISGetIndices(iip, &riip));
2203 
2204   /* initialization */
2205   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
2206 
2207   do {
2208     sctx.newshift = PETSC_FALSE;
2209 
2210     for (i = 0; i < mbs; i++) jl[i] = mbs;
2211     il[0] = 0;
2212 
2213     for (k = 0; k < mbs; k++) {
2214       /* zero rtmp */
2215       nz    = bi[k + 1] - bi[k];
2216       bjtmp = bj + bi[k];
2217       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2218 
2219       bval = ba + bi[k];
2220       /* initialize k-th row by the perm[k]-th row of A */
2221       jmin = ai[rip[k]];
2222       jmax = ai[rip[k] + 1];
2223       for (j = jmin; j < jmax; j++) {
2224         col = riip[aj[j]];
2225         if (col >= k) { /* only take upper triangular entry */
2226           rtmp[col] = aa[j];
2227           *bval++   = 0.0; /* for in-place factorization */
2228         }
2229       }
2230       /* shift the diagonal of the matrix */
2231       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2232 
2233       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2234       dk = rtmp[k];
2235       i  = jl[k]; /* first row to be added to k_th row  */
2236 
2237       while (i < k) {
2238         nexti = jl[i]; /* next row to be added to k_th row */
2239 
2240         /* compute multiplier, update diag(k) and U(i,k) */
2241         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
2242         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2243         dk += uikdi * ba[ili];
2244         ba[ili] = uikdi; /* -U(i,k) */
2245 
2246         /* add multiple of row i to k-th row */
2247         jmin = ili + 1;
2248         jmax = bi[i + 1];
2249         if (jmin < jmax) {
2250           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2251           /* update il and jl for row i */
2252           il[i] = jmin;
2253           j     = bj[jmin];
2254           jl[i] = jl[j];
2255           jl[j] = i;
2256         }
2257         i = nexti;
2258       }
2259 
2260       /* shift the diagonals when zero pivot is detected */
2261       /* compute rs=sum of abs(off-diagonal) */
2262       rs   = 0.0;
2263       jmin = bi[k] + 1;
2264       nz   = bi[k + 1] - jmin;
2265       bcol = bj + jmin;
2266       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
2267 
2268       sctx.rs = rs;
2269       sctx.pv = dk;
2270       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2271       if (sctx.newshift) break;
2272       dk = sctx.pv;
2273 
2274       /* copy data into U(k,:) */
2275       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2276       jmin      = bi[k] + 1;
2277       jmax      = bi[k + 1];
2278       if (jmin < jmax) {
2279         for (j = jmin; j < jmax; j++) {
2280           col   = bj[j];
2281           ba[j] = rtmp[col];
2282         }
2283         /* add the k-th row into il and jl */
2284         il[k] = jmin;
2285         i     = bj[jmin];
2286         jl[k] = jl[i];
2287         jl[i] = k;
2288       }
2289     }
2290   } while (sctx.newshift);
2291 
2292   PetscCall(PetscFree3(rtmp, il, jl));
2293   PetscCall(ISRestoreIndices(ip, &rip));
2294   PetscCall(ISRestoreIndices(iip, &riip));
2295 
2296   PetscCall(ISIdentity(ip, &perm_identity));
2297   if (perm_identity) {
2298     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2299     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2300     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2301     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2302   } else {
2303     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2304     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2305     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2306     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2307   }
2308 
2309   C->assembled    = PETSC_TRUE;
2310   C->preallocated = PETSC_TRUE;
2311 
2312   PetscCall(PetscLogFlops(C->rmap->n));
2313   if (sctx.nshift) {
2314     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2315       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2316     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2317       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2318     }
2319   }
2320   PetscFunctionReturn(PETSC_SUCCESS);
2321 }
2322 
2323 /*
2324    icc() under revised new data structure.
2325    Factored arrays bj and ba are stored as
2326      U(0,:),...,U(i,:),U(n-1,:)
2327 
2328    ui=fact->i is an array of size n+1, in which
2329    ui+
2330      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2331      ui[n]:  points to U(n-1,n-1)+1
2332 
2333   udiag=fact->diag is an array of size n,in which
2334      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2335 
2336    U(i,:) contains udiag[i] as its last entry, i.e.,
2337     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2338 */
2339 
2340 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2341 {
2342   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2343   Mat_SeqSBAIJ      *b;
2344   PetscBool          perm_identity, missing;
2345   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2346   const PetscInt    *rip, *riip;
2347   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2348   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2349   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2350   PetscReal          fill = info->fill, levels = info->levels;
2351   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2352   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2353   PetscBT            lnkbt;
2354   IS                 iperm;
2355 
2356   PetscFunctionBegin;
2357   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2358   PetscCall(MatMissingDiagonal(A, &missing, &d));
2359   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2360   PetscCall(ISIdentity(perm, &perm_identity));
2361   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2362 
2363   PetscCall(PetscMalloc1(am + 1, &ui));
2364   PetscCall(PetscMalloc1(am + 1, &udiag));
2365   ui[0] = 0;
2366 
2367   /* ICC(0) without matrix ordering: simply rearrange column indices */
2368   if (!levels && perm_identity) {
2369     for (i = 0; i < am; i++) {
2370       ncols     = ai[i + 1] - a->diag[i];
2371       ui[i + 1] = ui[i] + ncols;
2372       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2373     }
2374     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2375     cols = uj;
2376     for (i = 0; i < am; i++) {
2377       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2378       ncols = ai[i + 1] - a->diag[i] - 1;
2379       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2380       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2381     }
2382   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2383     PetscCall(ISGetIndices(iperm, &riip));
2384     PetscCall(ISGetIndices(perm, &rip));
2385 
2386     /* initialization */
2387     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2388 
2389     /* jl: linked list for storing indices of the pivot rows
2390        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2391     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2392     for (i = 0; i < am; i++) {
2393       jl[i] = am;
2394       il[i] = 0;
2395     }
2396 
2397     /* create and initialize a linked list for storing column indices of the active row k */
2398     nlnk = am + 1;
2399     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2400 
2401     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2402     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2403     current_space = free_space;
2404     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2405     current_space_lvl = free_space_lvl;
2406 
2407     for (k = 0; k < am; k++) { /* for each active row k */
2408       /* initialize lnk by the column indices of row rip[k] of A */
2409       nzk   = 0;
2410       ncols = ai[rip[k] + 1] - ai[rip[k]];
2411       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2412       ncols_upper = 0;
2413       for (j = 0; j < ncols; j++) {
2414         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2415         if (riip[i] >= k) {         /* only take upper triangular entry */
2416           ajtmp[ncols_upper] = i;
2417           ncols_upper++;
2418         }
2419       }
2420       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2421       nzk += nlnk;
2422 
2423       /* update lnk by computing fill-in for each pivot row to be merged in */
2424       prow = jl[k]; /* 1st pivot row */
2425 
2426       while (prow < k) {
2427         nextprow = jl[prow];
2428 
2429         /* merge prow into k-th row */
2430         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2431         jmax  = ui[prow + 1];
2432         ncols = jmax - jmin;
2433         i     = jmin - ui[prow];
2434         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2435         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2436         j     = *(uj - 1);
2437         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2438         nzk += nlnk;
2439 
2440         /* update il and jl for prow */
2441         if (jmin < jmax) {
2442           il[prow] = jmin;
2443           j        = *cols;
2444           jl[prow] = jl[j];
2445           jl[j]    = prow;
2446         }
2447         prow = nextprow;
2448       }
2449 
2450       /* if free space is not available, make more free space */
2451       if (current_space->local_remaining < nzk) {
2452         i = am - k + 1;                                    /* num of unfactored rows */
2453         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2454         PetscCall(PetscFreeSpaceGet(i, &current_space));
2455         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2456         reallocs++;
2457       }
2458 
2459       /* copy data into free_space and free_space_lvl, then initialize lnk */
2460       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2461       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2462 
2463       /* add the k-th row into il and jl */
2464       if (nzk > 1) {
2465         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2466         jl[k] = jl[i];
2467         jl[i] = k;
2468         il[k] = ui[k] + 1;
2469       }
2470       uj_ptr[k]     = current_space->array;
2471       uj_lvl_ptr[k] = current_space_lvl->array;
2472 
2473       current_space->array += nzk;
2474       current_space->local_used += nzk;
2475       current_space->local_remaining -= nzk;
2476 
2477       current_space_lvl->array += nzk;
2478       current_space_lvl->local_used += nzk;
2479       current_space_lvl->local_remaining -= nzk;
2480 
2481       ui[k + 1] = ui[k] + nzk;
2482     }
2483 
2484     PetscCall(ISRestoreIndices(perm, &rip));
2485     PetscCall(ISRestoreIndices(iperm, &riip));
2486     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2487     PetscCall(PetscFree(ajtmp));
2488 
2489     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2490     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2491     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
2492     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2493     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2494 
2495   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2496 
2497   /* put together the new matrix in MATSEQSBAIJ format */
2498   b               = (Mat_SeqSBAIJ *)(fact)->data;
2499   b->singlemalloc = PETSC_FALSE;
2500 
2501   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2502 
2503   b->j         = uj;
2504   b->i         = ui;
2505   b->diag      = udiag;
2506   b->free_diag = PETSC_TRUE;
2507   b->ilen      = NULL;
2508   b->imax      = NULL;
2509   b->row       = perm;
2510   b->col       = perm;
2511   PetscCall(PetscObjectReference((PetscObject)perm));
2512   PetscCall(PetscObjectReference((PetscObject)perm));
2513   b->icol          = iperm;
2514   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2515 
2516   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2517 
2518   b->maxnz = b->nz = ui[am];
2519   b->free_a        = PETSC_TRUE;
2520   b->free_ij       = PETSC_TRUE;
2521 
2522   fact->info.factor_mallocs   = reallocs;
2523   fact->info.fill_ratio_given = fill;
2524   if (ai[am] != 0) {
2525     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2526     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2527   } else {
2528     fact->info.fill_ratio_needed = 0.0;
2529   }
2530 #if defined(PETSC_USE_INFO)
2531   if (ai[am] != 0) {
2532     PetscReal af = fact->info.fill_ratio_needed;
2533     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2534     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2535     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2536   } else {
2537     PetscCall(PetscInfo(A, "Empty matrix\n"));
2538   }
2539 #endif
2540   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2541   PetscFunctionReturn(PETSC_SUCCESS);
2542 }
2543 
2544 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2545 {
2546   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2547   Mat_SeqSBAIJ      *b;
2548   PetscBool          perm_identity, missing;
2549   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2550   const PetscInt    *rip, *riip;
2551   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2552   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2553   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2554   PetscReal          fill = info->fill, levels = info->levels;
2555   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2556   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2557   PetscBT            lnkbt;
2558   IS                 iperm;
2559 
2560   PetscFunctionBegin;
2561   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2562   PetscCall(MatMissingDiagonal(A, &missing, &d));
2563   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2564   PetscCall(ISIdentity(perm, &perm_identity));
2565   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2566 
2567   PetscCall(PetscMalloc1(am + 1, &ui));
2568   PetscCall(PetscMalloc1(am + 1, &udiag));
2569   ui[0] = 0;
2570 
2571   /* ICC(0) without matrix ordering: simply copies fill pattern */
2572   if (!levels && perm_identity) {
2573     for (i = 0; i < am; i++) {
2574       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2575       udiag[i]  = ui[i];
2576     }
2577     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2578     cols = uj;
2579     for (i = 0; i < am; i++) {
2580       aj    = a->j + a->diag[i];
2581       ncols = ui[i + 1] - ui[i];
2582       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2583     }
2584   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2585     PetscCall(ISGetIndices(iperm, &riip));
2586     PetscCall(ISGetIndices(perm, &rip));
2587 
2588     /* initialization */
2589     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2590 
2591     /* jl: linked list for storing indices of the pivot rows
2592        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2593     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2594     for (i = 0; i < am; i++) {
2595       jl[i] = am;
2596       il[i] = 0;
2597     }
2598 
2599     /* create and initialize a linked list for storing column indices of the active row k */
2600     nlnk = am + 1;
2601     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2602 
2603     /* initial FreeSpace size is fill*(ai[am]+1) */
2604     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2605     current_space = free_space;
2606     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2607     current_space_lvl = free_space_lvl;
2608 
2609     for (k = 0; k < am; k++) { /* for each active row k */
2610       /* initialize lnk by the column indices of row rip[k] of A */
2611       nzk   = 0;
2612       ncols = ai[rip[k] + 1] - ai[rip[k]];
2613       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2614       ncols_upper = 0;
2615       for (j = 0; j < ncols; j++) {
2616         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2617         if (riip[i] >= k) {         /* only take upper triangular entry */
2618           ajtmp[ncols_upper] = i;
2619           ncols_upper++;
2620         }
2621       }
2622       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2623       nzk += nlnk;
2624 
2625       /* update lnk by computing fill-in for each pivot row to be merged in */
2626       prow = jl[k]; /* 1st pivot row */
2627 
2628       while (prow < k) {
2629         nextprow = jl[prow];
2630 
2631         /* merge prow into k-th row */
2632         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2633         jmax  = ui[prow + 1];
2634         ncols = jmax - jmin;
2635         i     = jmin - ui[prow];
2636         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2637         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2638         j     = *(uj - 1);
2639         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2640         nzk += nlnk;
2641 
2642         /* update il and jl for prow */
2643         if (jmin < jmax) {
2644           il[prow] = jmin;
2645           j        = *cols;
2646           jl[prow] = jl[j];
2647           jl[j]    = prow;
2648         }
2649         prow = nextprow;
2650       }
2651 
2652       /* if free space is not available, make more free space */
2653       if (current_space->local_remaining < nzk) {
2654         i = am - k + 1;                                    /* num of unfactored rows */
2655         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2656         PetscCall(PetscFreeSpaceGet(i, &current_space));
2657         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2658         reallocs++;
2659       }
2660 
2661       /* copy data into free_space and free_space_lvl, then initialize lnk */
2662       PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2663       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2664 
2665       /* add the k-th row into il and jl */
2666       if (nzk > 1) {
2667         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2668         jl[k] = jl[i];
2669         jl[i] = k;
2670         il[k] = ui[k] + 1;
2671       }
2672       uj_ptr[k]     = current_space->array;
2673       uj_lvl_ptr[k] = current_space_lvl->array;
2674 
2675       current_space->array += nzk;
2676       current_space->local_used += nzk;
2677       current_space->local_remaining -= nzk;
2678 
2679       current_space_lvl->array += nzk;
2680       current_space_lvl->local_used += nzk;
2681       current_space_lvl->local_remaining -= nzk;
2682 
2683       ui[k + 1] = ui[k] + nzk;
2684     }
2685 
2686 #if defined(PETSC_USE_INFO)
2687     if (ai[am] != 0) {
2688       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2689       PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2690       PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2691       PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2692     } else {
2693       PetscCall(PetscInfo(A, "Empty matrix\n"));
2694     }
2695 #endif
2696 
2697     PetscCall(ISRestoreIndices(perm, &rip));
2698     PetscCall(ISRestoreIndices(iperm, &riip));
2699     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2700     PetscCall(PetscFree(ajtmp));
2701 
2702     /* destroy list of free space and other temporary array(s) */
2703     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2704     PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2705     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2706     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2707 
2708   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2709 
2710   /* put together the new matrix in MATSEQSBAIJ format */
2711 
2712   b               = (Mat_SeqSBAIJ *)fact->data;
2713   b->singlemalloc = PETSC_FALSE;
2714 
2715   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2716 
2717   b->j         = uj;
2718   b->i         = ui;
2719   b->diag      = udiag;
2720   b->free_diag = PETSC_TRUE;
2721   b->ilen      = NULL;
2722   b->imax      = NULL;
2723   b->row       = perm;
2724   b->col       = perm;
2725 
2726   PetscCall(PetscObjectReference((PetscObject)perm));
2727   PetscCall(PetscObjectReference((PetscObject)perm));
2728 
2729   b->icol          = iperm;
2730   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2731   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2732   b->maxnz = b->nz = ui[am];
2733   b->free_a        = PETSC_TRUE;
2734   b->free_ij       = PETSC_TRUE;
2735 
2736   fact->info.factor_mallocs   = reallocs;
2737   fact->info.fill_ratio_given = fill;
2738   if (ai[am] != 0) {
2739     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2740   } else {
2741     fact->info.fill_ratio_needed = 0.0;
2742   }
2743   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2744   PetscFunctionReturn(PETSC_SUCCESS);
2745 }
2746 
2747 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2748 {
2749   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2750   Mat_SeqSBAIJ      *b;
2751   PetscBool          perm_identity, missing;
2752   PetscReal          fill = info->fill;
2753   const PetscInt    *rip, *riip;
2754   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2755   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2756   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2757   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2758   PetscBT            lnkbt;
2759   IS                 iperm;
2760 
2761   PetscFunctionBegin;
2762   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2763   PetscCall(MatMissingDiagonal(A, &missing, &i));
2764   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2765 
2766   /* check whether perm is the identity mapping */
2767   PetscCall(ISIdentity(perm, &perm_identity));
2768   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2769   PetscCall(ISGetIndices(iperm, &riip));
2770   PetscCall(ISGetIndices(perm, &rip));
2771 
2772   /* initialization */
2773   PetscCall(PetscMalloc1(am + 1, &ui));
2774   PetscCall(PetscMalloc1(am + 1, &udiag));
2775   ui[0] = 0;
2776 
2777   /* jl: linked list for storing indices of the pivot rows
2778      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2779   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2780   for (i = 0; i < am; i++) {
2781     jl[i] = am;
2782     il[i] = 0;
2783   }
2784 
2785   /* create and initialize a linked list for storing column indices of the active row k */
2786   nlnk = am + 1;
2787   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2788 
2789   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2790   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2791   current_space = free_space;
2792 
2793   for (k = 0; k < am; k++) { /* for each active row k */
2794     /* initialize lnk by the column indices of row rip[k] of A */
2795     nzk   = 0;
2796     ncols = ai[rip[k] + 1] - ai[rip[k]];
2797     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2798     ncols_upper = 0;
2799     for (j = 0; j < ncols; j++) {
2800       i = riip[*(aj + ai[rip[k]] + j)];
2801       if (i >= k) { /* only take upper triangular entry */
2802         cols[ncols_upper] = i;
2803         ncols_upper++;
2804       }
2805     }
2806     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2807     nzk += nlnk;
2808 
2809     /* update lnk by computing fill-in for each pivot row to be merged in */
2810     prow = jl[k]; /* 1st pivot row */
2811 
2812     while (prow < k) {
2813       nextprow = jl[prow];
2814       /* merge prow into k-th row */
2815       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2816       jmax   = ui[prow + 1];
2817       ncols  = jmax - jmin;
2818       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2819       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2820       nzk += nlnk;
2821 
2822       /* update il and jl for prow */
2823       if (jmin < jmax) {
2824         il[prow] = jmin;
2825         j        = *uj_ptr;
2826         jl[prow] = jl[j];
2827         jl[j]    = prow;
2828       }
2829       prow = nextprow;
2830     }
2831 
2832     /* if free space is not available, make more free space */
2833     if (current_space->local_remaining < nzk) {
2834       i = am - k + 1;                                    /* num of unfactored rows */
2835       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2836       PetscCall(PetscFreeSpaceGet(i, &current_space));
2837       reallocs++;
2838     }
2839 
2840     /* copy data into free space, then initialize lnk */
2841     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2842 
2843     /* add the k-th row into il and jl */
2844     if (nzk > 1) {
2845       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2846       jl[k] = jl[i];
2847       jl[i] = k;
2848       il[k] = ui[k] + 1;
2849     }
2850     ui_ptr[k] = current_space->array;
2851 
2852     current_space->array += nzk;
2853     current_space->local_used += nzk;
2854     current_space->local_remaining -= nzk;
2855 
2856     ui[k + 1] = ui[k] + nzk;
2857   }
2858 
2859   PetscCall(ISRestoreIndices(perm, &rip));
2860   PetscCall(ISRestoreIndices(iperm, &riip));
2861   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2862 
2863   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2864   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2865   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2866   PetscCall(PetscLLDestroy(lnk, lnkbt));
2867 
2868   /* put together the new matrix in MATSEQSBAIJ format */
2869 
2870   b               = (Mat_SeqSBAIJ *)fact->data;
2871   b->singlemalloc = PETSC_FALSE;
2872   b->free_a       = PETSC_TRUE;
2873   b->free_ij      = PETSC_TRUE;
2874 
2875   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2876 
2877   b->j         = uj;
2878   b->i         = ui;
2879   b->diag      = udiag;
2880   b->free_diag = PETSC_TRUE;
2881   b->ilen      = NULL;
2882   b->imax      = NULL;
2883   b->row       = perm;
2884   b->col       = perm;
2885 
2886   PetscCall(PetscObjectReference((PetscObject)perm));
2887   PetscCall(PetscObjectReference((PetscObject)perm));
2888 
2889   b->icol          = iperm;
2890   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2891 
2892   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2893 
2894   b->maxnz = b->nz = ui[am];
2895 
2896   fact->info.factor_mallocs   = reallocs;
2897   fact->info.fill_ratio_given = fill;
2898   if (ai[am] != 0) {
2899     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2900     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2901   } else {
2902     fact->info.fill_ratio_needed = 0.0;
2903   }
2904 #if defined(PETSC_USE_INFO)
2905   if (ai[am] != 0) {
2906     PetscReal af = fact->info.fill_ratio_needed;
2907     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2908     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2909     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2910   } else {
2911     PetscCall(PetscInfo(A, "Empty matrix\n"));
2912   }
2913 #endif
2914   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2915   PetscFunctionReturn(PETSC_SUCCESS);
2916 }
2917 
2918 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2919 {
2920   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2921   Mat_SeqSBAIJ      *b;
2922   PetscBool          perm_identity, missing;
2923   PetscReal          fill = info->fill;
2924   const PetscInt    *rip, *riip;
2925   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2926   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2927   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
2928   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2929   PetscBT            lnkbt;
2930   IS                 iperm;
2931 
2932   PetscFunctionBegin;
2933   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2934   PetscCall(MatMissingDiagonal(A, &missing, &i));
2935   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2936 
2937   /* check whether perm is the identity mapping */
2938   PetscCall(ISIdentity(perm, &perm_identity));
2939   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2940   PetscCall(ISGetIndices(iperm, &riip));
2941   PetscCall(ISGetIndices(perm, &rip));
2942 
2943   /* initialization */
2944   PetscCall(PetscMalloc1(am + 1, &ui));
2945   ui[0] = 0;
2946 
2947   /* jl: linked list for storing indices of the pivot rows
2948      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2949   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2950   for (i = 0; i < am; i++) {
2951     jl[i] = am;
2952     il[i] = 0;
2953   }
2954 
2955   /* create and initialize a linked list for storing column indices of the active row k */
2956   nlnk = am + 1;
2957   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2958 
2959   /* initial FreeSpace size is fill*(ai[am]+1) */
2960   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2961   current_space = free_space;
2962 
2963   for (k = 0; k < am; k++) { /* for each active row k */
2964     /* initialize lnk by the column indices of row rip[k] of A */
2965     nzk   = 0;
2966     ncols = ai[rip[k] + 1] - ai[rip[k]];
2967     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2968     ncols_upper = 0;
2969     for (j = 0; j < ncols; j++) {
2970       i = riip[*(aj + ai[rip[k]] + j)];
2971       if (i >= k) { /* only take upper triangular entry */
2972         cols[ncols_upper] = i;
2973         ncols_upper++;
2974       }
2975     }
2976     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2977     nzk += nlnk;
2978 
2979     /* update lnk by computing fill-in for each pivot row to be merged in */
2980     prow = jl[k]; /* 1st pivot row */
2981 
2982     while (prow < k) {
2983       nextprow = jl[prow];
2984       /* merge prow into k-th row */
2985       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2986       jmax   = ui[prow + 1];
2987       ncols  = jmax - jmin;
2988       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2989       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2990       nzk += nlnk;
2991 
2992       /* update il and jl for prow */
2993       if (jmin < jmax) {
2994         il[prow] = jmin;
2995         j        = *uj_ptr;
2996         jl[prow] = jl[j];
2997         jl[j]    = prow;
2998       }
2999       prow = nextprow;
3000     }
3001 
3002     /* if free space is not available, make more free space */
3003     if (current_space->local_remaining < nzk) {
3004       i = am - k + 1;                     /* num of unfactored rows */
3005       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3006       PetscCall(PetscFreeSpaceGet(i, &current_space));
3007       reallocs++;
3008     }
3009 
3010     /* copy data into free space, then initialize lnk */
3011     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
3012 
3013     /* add the k-th row into il and jl */
3014     if (nzk - 1 > 0) {
3015       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3016       jl[k] = jl[i];
3017       jl[i] = k;
3018       il[k] = ui[k] + 1;
3019     }
3020     ui_ptr[k] = current_space->array;
3021 
3022     current_space->array += nzk;
3023     current_space->local_used += nzk;
3024     current_space->local_remaining -= nzk;
3025 
3026     ui[k + 1] = ui[k] + nzk;
3027   }
3028 
3029 #if defined(PETSC_USE_INFO)
3030   if (ai[am] != 0) {
3031     PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]);
3032     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3033     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3034     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3035   } else {
3036     PetscCall(PetscInfo(A, "Empty matrix\n"));
3037   }
3038 #endif
3039 
3040   PetscCall(ISRestoreIndices(perm, &rip));
3041   PetscCall(ISRestoreIndices(iperm, &riip));
3042   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
3043 
3044   /* destroy list of free space and other temporary array(s) */
3045   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
3046   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3047   PetscCall(PetscLLDestroy(lnk, lnkbt));
3048 
3049   /* put together the new matrix in MATSEQSBAIJ format */
3050 
3051   b               = (Mat_SeqSBAIJ *)fact->data;
3052   b->singlemalloc = PETSC_FALSE;
3053   b->free_a       = PETSC_TRUE;
3054   b->free_ij      = PETSC_TRUE;
3055 
3056   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
3057 
3058   b->j    = uj;
3059   b->i    = ui;
3060   b->diag = NULL;
3061   b->ilen = NULL;
3062   b->imax = NULL;
3063   b->row  = perm;
3064   b->col  = perm;
3065 
3066   PetscCall(PetscObjectReference((PetscObject)perm));
3067   PetscCall(PetscObjectReference((PetscObject)perm));
3068 
3069   b->icol          = iperm;
3070   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3071 
3072   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3073   b->maxnz = b->nz = ui[am];
3074 
3075   fact->info.factor_mallocs   = reallocs;
3076   fact->info.fill_ratio_given = fill;
3077   if (ai[am] != 0) {
3078     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
3079   } else {
3080     fact->info.fill_ratio_needed = 0.0;
3081   }
3082   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3083   PetscFunctionReturn(PETSC_SUCCESS);
3084 }
3085 
3086 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3087 {
3088   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3089   PetscInt           n  = A->rmap->n;
3090   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3091   PetscScalar       *x, sum;
3092   const PetscScalar *b;
3093   const MatScalar   *aa = a->a, *v;
3094   PetscInt           i, nz;
3095 
3096   PetscFunctionBegin;
3097   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3098 
3099   PetscCall(VecGetArrayRead(bb, &b));
3100   PetscCall(VecGetArrayWrite(xx, &x));
3101 
3102   /* forward solve the lower triangular */
3103   x[0] = b[0];
3104   v    = aa;
3105   vi   = aj;
3106   for (i = 1; i < n; i++) {
3107     nz  = ai[i + 1] - ai[i];
3108     sum = b[i];
3109     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3110     v += nz;
3111     vi += nz;
3112     x[i] = sum;
3113   }
3114 
3115   /* backward solve the upper triangular */
3116   for (i = n - 1; i >= 0; i--) {
3117     v   = aa + adiag[i + 1] + 1;
3118     vi  = aj + adiag[i + 1] + 1;
3119     nz  = adiag[i] - adiag[i + 1] - 1;
3120     sum = x[i];
3121     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3122     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3123   }
3124 
3125   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3126   PetscCall(VecRestoreArrayRead(bb, &b));
3127   PetscCall(VecRestoreArrayWrite(xx, &x));
3128   PetscFunctionReturn(PETSC_SUCCESS);
3129 }
3130 
3131 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3132 {
3133   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3134   IS                 iscol = a->col, isrow = a->row;
3135   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3136   const PetscInt    *rout, *cout, *r, *c;
3137   PetscScalar       *x, *tmp, sum;
3138   const PetscScalar *b;
3139   const MatScalar   *aa = a->a, *v;
3140 
3141   PetscFunctionBegin;
3142   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3143 
3144   PetscCall(VecGetArrayRead(bb, &b));
3145   PetscCall(VecGetArrayWrite(xx, &x));
3146   tmp = a->solve_work;
3147 
3148   PetscCall(ISGetIndices(isrow, &rout));
3149   r = rout;
3150   PetscCall(ISGetIndices(iscol, &cout));
3151   c = cout;
3152 
3153   /* forward solve the lower triangular */
3154   tmp[0] = b[r[0]];
3155   v      = aa;
3156   vi     = aj;
3157   for (i = 1; i < n; i++) {
3158     nz  = ai[i + 1] - ai[i];
3159     sum = b[r[i]];
3160     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3161     tmp[i] = sum;
3162     v += nz;
3163     vi += nz;
3164   }
3165 
3166   /* backward solve the upper triangular */
3167   for (i = n - 1; i >= 0; i--) {
3168     v   = aa + adiag[i + 1] + 1;
3169     vi  = aj + adiag[i + 1] + 1;
3170     nz  = adiag[i] - adiag[i + 1] - 1;
3171     sum = tmp[i];
3172     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3173     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3174   }
3175 
3176   PetscCall(ISRestoreIndices(isrow, &rout));
3177   PetscCall(ISRestoreIndices(iscol, &cout));
3178   PetscCall(VecRestoreArrayRead(bb, &b));
3179   PetscCall(VecRestoreArrayWrite(xx, &x));
3180   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3181   PetscFunctionReturn(PETSC_SUCCESS);
3182 }
3183 
3184 /*
3185     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3186 */
3187 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3188 {
3189   Mat             B = *fact;
3190   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3191   IS              isicol;
3192   const PetscInt *r, *ic;
3193   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3194   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3195   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3196   PetscInt        nlnk, *lnk;
3197   PetscBT         lnkbt;
3198   PetscBool       row_identity, icol_identity;
3199   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3200   const PetscInt *ics;
3201   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3202   PetscReal       dt = info->dt, shift = info->shiftamount;
3203   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3204   PetscBool       missing;
3205 
3206   PetscFunctionBegin;
3207   if (dt == PETSC_DEFAULT) dt = 0.005;
3208   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3209 
3210   /* ------- symbolic factorization, can be reused ---------*/
3211   PetscCall(MatMissingDiagonal(A, &missing, &i));
3212   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3213   adiag = a->diag;
3214 
3215   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3216 
3217   /* bdiag is location of diagonal in factor */
3218   PetscCall(PetscMalloc1(n + 1, &bdiag));     /* becomes b->diag */
3219   PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */
3220 
3221   /* allocate row pointers bi */
3222   PetscCall(PetscMalloc1(2 * n + 2, &bi));
3223 
3224   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3225   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3226   nnz_max = ai[n] + 2 * n * dtcount + 2;
3227 
3228   PetscCall(PetscMalloc1(nnz_max + 1, &bj));
3229   PetscCall(PetscMalloc1(nnz_max + 1, &ba));
3230 
3231   /* put together the new matrix */
3232   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3233   b = (Mat_SeqAIJ *)B->data;
3234 
3235   b->free_a       = PETSC_TRUE;
3236   b->free_ij      = PETSC_TRUE;
3237   b->singlemalloc = PETSC_FALSE;
3238 
3239   b->a    = ba;
3240   b->j    = bj;
3241   b->i    = bi;
3242   b->diag = bdiag;
3243   b->ilen = NULL;
3244   b->imax = NULL;
3245   b->row  = isrow;
3246   b->col  = iscol;
3247   PetscCall(PetscObjectReference((PetscObject)isrow));
3248   PetscCall(PetscObjectReference((PetscObject)iscol));
3249   b->icol = isicol;
3250 
3251   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3252   b->maxnz = nnz_max;
3253 
3254   B->factortype            = MAT_FACTOR_ILUDT;
3255   B->info.factor_mallocs   = 0;
3256   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3257   /* ------- end of symbolic factorization ---------*/
3258 
3259   PetscCall(ISGetIndices(isrow, &r));
3260   PetscCall(ISGetIndices(isicol, &ic));
3261   ics = ic;
3262 
3263   /* linked list for storing column indices of the active row */
3264   nlnk = n + 1;
3265   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
3266 
3267   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3268   PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3269   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3270   PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3271   PetscCall(PetscArrayzero(rtmp, n));
3272 
3273   bi[0]         = 0;
3274   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3275   bdiag_rev[n]  = bdiag[0];
3276   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3277   for (i = 0; i < n; i++) {
3278     /* copy initial fill into linked list */
3279     nzi = ai[r[i] + 1] - ai[r[i]];
3280     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3281     nzi_al = adiag[r[i]] - ai[r[i]];
3282     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3283     ajtmp  = aj + ai[r[i]];
3284     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3285 
3286     /* load in initial (unfactored row) */
3287     aatmp = a->a + ai[r[i]];
3288     for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;
3289 
3290     /* add pivot rows into linked list */
3291     row = lnk[n];
3292     while (row < i) {
3293       nzi_bl = bi[row + 1] - bi[row] + 1;
3294       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3295       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3296       nzi += nlnk;
3297       row = lnk[row];
3298     }
3299 
3300     /* copy data from lnk into jtmp, then initialize lnk */
3301     PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));
3302 
3303     /* numerical factorization */
3304     bjtmp = jtmp;
3305     row   = *bjtmp++; /* 1st pivot row */
3306     while (row < i) {
3307       pc         = rtmp + row;
3308       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3309       multiplier = (*pc) * (*pv);
3310       *pc        = multiplier;
3311       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3312         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3313         pv = ba + bdiag[row + 1] + 1;
3314         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3315         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3316         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3317       }
3318       row = *bjtmp++;
3319     }
3320 
3321     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3322     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3323     nzi_bl   = 0;
3324     j        = 0;
3325     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3326       vtmp[j]       = rtmp[jtmp[j]];
3327       rtmp[jtmp[j]] = 0.0;
3328       nzi_bl++;
3329       j++;
3330     }
3331     nzi_bu = nzi - nzi_bl - 1;
3332     while (j < nzi) {
3333       vtmp[j]       = rtmp[jtmp[j]];
3334       rtmp[jtmp[j]] = 0.0;
3335       j++;
3336     }
3337 
3338     bjtmp = bj + bi[i];
3339     batmp = ba + bi[i];
3340     /* apply level dropping rule to L part */
3341     ncut = nzi_al + dtcount;
3342     if (ncut < nzi_bl) {
3343       PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3344       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3345     } else {
3346       ncut = nzi_bl;
3347     }
3348     for (j = 0; j < ncut; j++) {
3349       bjtmp[j] = jtmp[j];
3350       batmp[j] = vtmp[j];
3351     }
3352     bi[i + 1] = bi[i] + ncut;
3353     nzi       = ncut + 1;
3354 
3355     /* apply level dropping rule to U part */
3356     ncut = nzi_au + dtcount;
3357     if (ncut < nzi_bu) {
3358       PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3359       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3360     } else {
3361       ncut = nzi_bu;
3362     }
3363     nzi += ncut;
3364 
3365     /* mark bdiagonal */
3366     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3367     bdiag_rev[n - i - 1] = bdiag[i + 1];
3368     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
3369     bjtmp                = bj + bdiag[i];
3370     batmp                = ba + bdiag[i];
3371     *bjtmp               = i;
3372     *batmp               = diag_tmp; /* rtmp[i]; */
3373     if (*batmp == 0.0) *batmp = dt + shift;
3374     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
3375 
3376     bjtmp = bj + bdiag[i + 1] + 1;
3377     batmp = ba + bdiag[i + 1] + 1;
3378     for (k = 0; k < ncut; k++) {
3379       bjtmp[k] = jtmp[nzi_bl + 1 + k];
3380       batmp[k] = vtmp[nzi_bl + 1 + k];
3381     }
3382 
3383     im[i] = nzi; /* used by PetscLLAddSortedLU() */
3384   }              /* for (i=0; i<n; i++) */
3385   PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);
3386 
3387   PetscCall(ISRestoreIndices(isrow, &r));
3388   PetscCall(ISRestoreIndices(isicol, &ic));
3389 
3390   PetscCall(PetscLLDestroy(lnk, lnkbt));
3391   PetscCall(PetscFree2(im, jtmp));
3392   PetscCall(PetscFree2(rtmp, vtmp));
3393   PetscCall(PetscFree(bdiag_rev));
3394 
3395   PetscCall(PetscLogFlops(B->cmap->n));
3396   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3397 
3398   PetscCall(ISIdentity(isrow, &row_identity));
3399   PetscCall(ISIdentity(isicol, &icol_identity));
3400   if (row_identity && icol_identity) {
3401     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3402   } else {
3403     B->ops->solve = MatSolve_SeqAIJ;
3404   }
3405 
3406   B->ops->solveadd          = NULL;
3407   B->ops->solvetranspose    = NULL;
3408   B->ops->solvetransposeadd = NULL;
3409   B->ops->matsolve          = NULL;
3410   B->assembled              = PETSC_TRUE;
3411   B->preallocated           = PETSC_TRUE;
3412   PetscFunctionReturn(PETSC_SUCCESS);
3413 }
3414 
3415 /* a wrapper of MatILUDTFactor_SeqAIJ() */
3416 /*
3417     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3418 */
3419 
3420 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3421 {
3422   PetscFunctionBegin;
3423   PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3424   PetscFunctionReturn(PETSC_SUCCESS);
3425 }
3426 
3427 /*
3428    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3429    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3430 */
3431 /*
3432     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3433 */
3434 
3435 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3436 {
3437   Mat             C = fact;
3438   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3439   IS              isrow = b->row, isicol = b->icol;
3440   const PetscInt *r, *ic, *ics;
3441   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3442   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3443   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3444   PetscReal       dt = info->dt, shift = info->shiftamount;
3445   PetscBool       row_identity, col_identity;
3446 
3447   PetscFunctionBegin;
3448   PetscCall(ISGetIndices(isrow, &r));
3449   PetscCall(ISGetIndices(isicol, &ic));
3450   PetscCall(PetscMalloc1(n + 1, &rtmp));
3451   ics = ic;
3452 
3453   for (i = 0; i < n; i++) {
3454     /* initialize rtmp array */
3455     nzl   = bi[i + 1] - bi[i]; /* num of nozeros in L(i,:) */
3456     bjtmp = bj + bi[i];
3457     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3458     rtmp[i] = 0.0;
3459     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nozeros in U(i,:) */
3460     bjtmp   = bj + bdiag[i + 1] + 1;
3461     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
3462 
3463     /* load in initial unfactored row of A */
3464     nz    = ai[r[i] + 1] - ai[r[i]];
3465     ajtmp = aj + ai[r[i]];
3466     v     = aa + ai[r[i]];
3467     for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];
3468 
3469     /* numerical factorization */
3470     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3471     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3472     k     = 0;
3473     while (k < nzl) {
3474       row        = *bjtmp++;
3475       pc         = rtmp + row;
3476       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3477       multiplier = (*pc) * (*pv);
3478       *pc        = multiplier;
3479       if (PetscAbsScalar(multiplier) > dt) {
3480         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3481         pv = b->a + bdiag[row + 1] + 1;
3482         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3483         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3484         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3485       }
3486       k++;
3487     }
3488 
3489     /* finished row so stick it into b->a */
3490     /* L-part */
3491     pv  = b->a + bi[i];
3492     pj  = bj + bi[i];
3493     nzl = bi[i + 1] - bi[i];
3494     for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];
3495 
3496     /* diagonal: invert diagonal entries for simpler triangular solves */
3497     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3498     b->a[bdiag[i]] = 1.0 / rtmp[i];
3499 
3500     /* U-part */
3501     pv  = b->a + bdiag[i + 1] + 1;
3502     pj  = bj + bdiag[i + 1] + 1;
3503     nzu = bdiag[i] - bdiag[i + 1] - 1;
3504     for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3505   }
3506 
3507   PetscCall(PetscFree(rtmp));
3508   PetscCall(ISRestoreIndices(isicol, &ic));
3509   PetscCall(ISRestoreIndices(isrow, &r));
3510 
3511   PetscCall(ISIdentity(isrow, &row_identity));
3512   PetscCall(ISIdentity(isicol, &col_identity));
3513   if (row_identity && col_identity) {
3514     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3515   } else {
3516     C->ops->solve = MatSolve_SeqAIJ;
3517   }
3518   C->ops->solveadd          = NULL;
3519   C->ops->solvetranspose    = NULL;
3520   C->ops->solvetransposeadd = NULL;
3521   C->ops->matsolve          = NULL;
3522   C->assembled              = PETSC_TRUE;
3523   C->preallocated           = PETSC_TRUE;
3524 
3525   PetscCall(PetscLogFlops(C->cmap->n));
3526   PetscFunctionReturn(PETSC_SUCCESS);
3527 }
3528