xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 7e1a0bbe36d2be40a00a95404ece00db4857f70d)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/sbaij/seq/sbaij.h>
3 #include <petscbt.h>
4 #include <../src/mat/utils/freespace.h>
5 
6 /*
7       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
8 
9       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
10 */
11 #if 0
12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
13 {
14   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
15   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
16   const PetscInt    *ai = a->i, *aj = a->j;
17   const PetscScalar *aa = a->a;
18   PetscBool         *done;
19   PetscReal          best, past = 0, future;
20 
21   PetscFunctionBegin;
22   /* pick initial row */
23   best = -1;
24   for (i = 0; i < n; i++) {
25     future = 0.0;
26     for (j = ai[i]; j < ai[i + 1]; j++) {
27       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
28       else past = PetscAbsScalar(aa[j]);
29     }
30     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
31     if (past / future > best) {
32       best    = past / future;
33       current = i;
34     }
35   }
36 
37   PetscCall(PetscMalloc1(n, &done));
38   PetscCall(PetscArrayzero(done, n));
39   PetscCall(PetscMalloc1(n, &order));
40   order[0] = current;
41   for (i = 0; i < n - 1; i++) {
42     done[current] = PETSC_TRUE;
43     best          = -1;
44     /* loop over all neighbors of current pivot */
45     for (j = ai[current]; j < ai[current + 1]; j++) {
46       jj = aj[j];
47       if (done[jj]) continue;
48       /* loop over columns of potential next row computing weights for below and above diagonal */
49       past = future = 0.0;
50       for (k = ai[jj]; k < ai[jj + 1]; k++) {
51         kk = aj[k];
52         if (done[kk]) past += PetscAbsScalar(aa[k]);
53         else if (kk != jj) future += PetscAbsScalar(aa[k]);
54       }
55       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
56       if (past / future > best) {
57         best       = past / future;
58         newcurrent = jj;
59       }
60     }
61     if (best == -1) { /* no neighbors to select from so select best of all that remain */
62       best = -1;
63       for (k = 0; k < n; k++) {
64         if (done[k]) continue;
65         future = 0.0;
66         past   = 0.0;
67         for (j = ai[k]; j < ai[k + 1]; j++) {
68           kk = aj[j];
69           if (done[kk]) past += PetscAbsScalar(aa[j]);
70           else if (kk != k) future += PetscAbsScalar(aa[j]);
71         }
72         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
73         if (past / future > best) {
74           best       = past / future;
75           newcurrent = k;
76         }
77       }
78     }
79     PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
80     current      = newcurrent;
81     order[i + 1] = current;
82   }
83   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
84   *icol = *irow;
85   PetscCall(PetscObjectReference((PetscObject)*irow));
86   PetscCall(PetscFree(done));
87   PetscCall(PetscFree(order));
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 #endif
91 
92 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
93 {
94   PetscFunctionBegin;
95   *type = MATSOLVERPETSC;
96   PetscFunctionReturn(PETSC_SUCCESS);
97 }
98 
99 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
100 {
101   PetscInt n = A->rmap->n;
102 
103   PetscFunctionBegin;
104 #if defined(PETSC_USE_COMPLEX)
105   if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
106     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
107     *B = NULL;
108     PetscFunctionReturn(PETSC_SUCCESS);
109   }
110 #endif
111 
112   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
113   PetscCall(MatSetSizes(*B, n, n, n, n));
114   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
115     PetscCall(MatSetType(*B, MATSEQAIJ));
116 
117     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
118     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
119 
120     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
121     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
122     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
123     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
124   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
125     PetscCall(MatSetType(*B, MATSEQSBAIJ));
126     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
127 
128     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
129     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
130     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
131     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
132   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
133   (*B)->factortype = ftype;
134 
135   PetscCall(PetscFree((*B)->solvertype));
136   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
137   (*B)->canuseordering = PETSC_TRUE;
138   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
139   PetscFunctionReturn(PETSC_SUCCESS);
140 }
141 
142 #if 0
143 // currently unused
144 static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
145 {
146   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
147   IS                 isicol;
148   const PetscInt    *r, *ic;
149   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
150   PetscInt          *bi, *bj, *ajtmp;
151   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
152   PetscReal          f;
153   PetscInt           nlnk, *lnk, k, **bi_ptr;
154   PetscFreeSpaceList free_space = NULL, current_space = NULL;
155   PetscBT            lnkbt;
156   PetscBool          missing;
157 
158   PetscFunctionBegin;
159   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
160   PetscCall(MatMissingDiagonal(A, &missing, &i));
161   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
162 
163   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
164   PetscCall(ISGetIndices(isrow, &r));
165   PetscCall(ISGetIndices(isicol, &ic));
166 
167   /* get new row pointers */
168   PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi));
169   bi[0] = 0;
170 
171   /* bdiag is location of diagonal in factor */
172   PetscCall(PetscMalloc1(n + 1, &bdiag));
173   bdiag[0] = 0;
174 
175   /* linked list for storing column indices of the active row */
176   nlnk = n + 1;
177   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
178   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
179 
180   /* initial FreeSpace size is f*(ai[n]+1) */
181   f = info->fill;
182   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
183   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
184   current_space = free_space;
185 
186   for (i = 0; i < n; i++) {
187     /* copy previous fill into linked list */
188     nzi   = 0;
189     nnz   = ai[r[i] + 1] - ai[r[i]];
190     ajtmp = aj + ai[r[i]];
191     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
192     nzi += nlnk;
193 
194     /* add pivot rows into linked list */
195     row = lnk[n];
196     while (row < i) {
197       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
198       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
199       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
200       nzi += nlnk;
201       row = lnk[row];
202     }
203     bi[i + 1] = bi[i] + nzi;
204     im[i]     = nzi;
205 
206     /* mark bdiag */
207     nzbd = 0;
208     nnz  = nzi;
209     k    = lnk[n];
210     while (nnz-- && k < i) {
211       nzbd++;
212       k = lnk[k];
213     }
214     bdiag[i] = bi[i] + nzbd;
215 
216     /* if free space is not available, make more free space */
217     if (current_space->local_remaining < nzi) {
218       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
219       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
220       reallocs++;
221     }
222 
223     /* copy data into free space, then initialize lnk */
224     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
225 
226     bi_ptr[i] = current_space->array;
227     current_space->array += nzi;
228     current_space->local_used += nzi;
229     current_space->local_remaining -= nzi;
230   }
231   #if defined(PETSC_USE_INFO)
232   if (ai[n] != 0) {
233     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
234     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
235     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
236     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
237     PetscCall(PetscInfo(A, "for best performance.\n"));
238   } else {
239     PetscCall(PetscInfo(A, "Empty matrix\n"));
240   }
241   #endif
242 
243   PetscCall(ISRestoreIndices(isrow, &r));
244   PetscCall(ISRestoreIndices(isicol, &ic));
245 
246   /* destroy list of free space and other temporary array(s) */
247   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj));
248   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
249   PetscCall(PetscLLDestroy(lnk, lnkbt));
250   PetscCall(PetscFree2(bi_ptr, im));
251 
252   /* put together the new matrix */
253   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
254   b = (Mat_SeqAIJ *)B->data;
255   b->free_ij      = PETSC_TRUE;
256   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a));
257   b->free_a       = PETSC_TRUE;
258   b->j    = bj;
259   b->i    = bi;
260   b->diag = bdiag;
261   b->ilen = NULL;
262   b->imax = NULL;
263   b->row  = isrow;
264   b->col  = iscol;
265   PetscCall(PetscObjectReference((PetscObject)isrow));
266   PetscCall(PetscObjectReference((PetscObject)iscol));
267   b->icol = isicol;
268   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
269 
270   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
271   b->maxnz = b->nz = bi[n];
272 
273   B->factortype            = MAT_FACTOR_LU;
274   B->info.factor_mallocs   = reallocs;
275   B->info.fill_ratio_given = f;
276 
277   if (ai[n]) {
278     B->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
279   } else {
280     B->info.fill_ratio_needed = 0.0;
281   }
282   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
283   if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 #endif
287 
288 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
289 {
290   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
291   IS                 isicol;
292   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
293   PetscInt           i, n = A->rmap->n;
294   PetscInt          *bi, *bj;
295   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
296   PetscReal          f;
297   PetscInt           nlnk, *lnk, k, **bi_ptr;
298   PetscFreeSpaceList free_space = NULL, current_space = NULL;
299   PetscBT            lnkbt;
300   PetscBool          missing;
301 
302   PetscFunctionBegin;
303   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
304   PetscCall(MatMissingDiagonal(A, &missing, &i));
305   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
306 
307   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
308   PetscCall(ISGetIndices(isrow, &r));
309   PetscCall(ISGetIndices(isicol, &ic));
310 
311   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
312   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
313   PetscCall(PetscMalloc1(n + 1, &bdiag));
314   bi[0] = bdiag[0] = 0;
315 
316   /* linked list for storing column indices of the active row */
317   nlnk = n + 1;
318   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
319 
320   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
321 
322   /* initial FreeSpace size is f*(ai[n]+1) */
323   f = info->fill;
324   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
325   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
326   current_space = free_space;
327 
328   for (i = 0; i < n; i++) {
329     /* copy previous fill into linked list */
330     nzi   = 0;
331     nnz   = ai[r[i] + 1] - ai[r[i]];
332     ajtmp = aj + ai[r[i]];
333     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
334     nzi += nlnk;
335 
336     /* add pivot rows into linked list */
337     row = lnk[n];
338     while (row < i) {
339       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
340       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
341       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
342       nzi += nlnk;
343       row = lnk[row];
344     }
345     bi[i + 1] = bi[i] + nzi;
346     im[i]     = nzi;
347 
348     /* mark bdiag */
349     nzbd = 0;
350     nnz  = nzi;
351     k    = lnk[n];
352     while (nnz-- && k < i) {
353       nzbd++;
354       k = lnk[k];
355     }
356     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
357 
358     /* if free space is not available, make more free space */
359     if (current_space->local_remaining < nzi) {
360       /* estimated additional space needed */
361       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
362       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
363       reallocs++;
364     }
365 
366     /* copy data into free space, then initialize lnk */
367     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
368 
369     bi_ptr[i] = current_space->array;
370     current_space->array += nzi;
371     current_space->local_used += nzi;
372     current_space->local_remaining -= nzi;
373   }
374 
375   PetscCall(ISRestoreIndices(isrow, &r));
376   PetscCall(ISRestoreIndices(isicol, &ic));
377 
378   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
379   PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
380   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
381   PetscCall(PetscLLDestroy(lnk, lnkbt));
382   PetscCall(PetscFree2(bi_ptr, im));
383 
384   /* put together the new matrix */
385   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
386   b          = (Mat_SeqAIJ *)B->data;
387   b->free_ij = PETSC_TRUE;
388   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
389   b->free_a = PETSC_TRUE;
390   b->j      = bj;
391   b->i      = bi;
392   b->diag   = bdiag;
393   b->ilen   = NULL;
394   b->imax   = NULL;
395   b->row    = isrow;
396   b->col    = iscol;
397   PetscCall(PetscObjectReference((PetscObject)isrow));
398   PetscCall(PetscObjectReference((PetscObject)iscol));
399   b->icol = isicol;
400   PetscCall(PetscMalloc1(n, &b->solve_work));
401 
402   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
403   b->maxnz = b->nz = bdiag[0] + 1;
404 
405   B->factortype            = MAT_FACTOR_LU;
406   B->info.factor_mallocs   = reallocs;
407   B->info.fill_ratio_given = f;
408 
409   if (ai[n]) {
410     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
411   } else {
412     B->info.fill_ratio_needed = 0.0;
413   }
414 #if defined(PETSC_USE_INFO)
415   if (ai[n] != 0) {
416     PetscReal af = B->info.fill_ratio_needed;
417     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
418     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
419     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
420     PetscCall(PetscInfo(A, "for best performance.\n"));
421   } else {
422     PetscCall(PetscInfo(A, "Empty matrix\n"));
423   }
424 #endif
425   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
426   if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
427   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
428   PetscFunctionReturn(PETSC_SUCCESS);
429 }
430 
431 /*
432     Trouble in factorization, should we dump the original matrix?
433 */
434 PetscErrorCode MatFactorDumpMatrix(Mat A)
435 {
436   PetscBool flg = PETSC_FALSE;
437 
438   PetscFunctionBegin;
439   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
440   if (flg) {
441     PetscViewer viewer;
442     char        filename[PETSC_MAX_PATH_LEN];
443 
444     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
445     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
446     PetscCall(MatView(A, viewer));
447     PetscCall(PetscViewerDestroy(&viewer));
448   }
449   PetscFunctionReturn(PETSC_SUCCESS);
450 }
451 
452 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
453 {
454   Mat              C = B;
455   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
456   IS               isrow = b->row, isicol = b->icol;
457   const PetscInt  *r, *ic, *ics;
458   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
459   PetscInt         i, j, k, nz, nzL, row, *pj;
460   const PetscInt  *ajtmp, *bjtmp;
461   MatScalar       *rtmp, *pc, multiplier, *pv;
462   const MatScalar *aa, *v;
463   MatScalar       *ba;
464   PetscBool        row_identity, col_identity;
465   FactorShiftCtx   sctx;
466   const PetscInt  *ddiag;
467   PetscReal        rs;
468   MatScalar        d;
469 
470   PetscFunctionBegin;
471   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
472   PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
473   /* MatPivotSetUp(): initialize shift context sctx */
474   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
475 
476   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
477     ddiag          = a->diag;
478     sctx.shift_top = info->zeropivot;
479     for (i = 0; i < n; i++) {
480       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
481       d  = (aa)[ddiag[i]];
482       rs = -PetscAbsScalar(d) - PetscRealPart(d);
483       v  = aa + ai[i];
484       nz = ai[i + 1] - ai[i];
485       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
486       if (rs > sctx.shift_top) sctx.shift_top = rs;
487     }
488     sctx.shift_top *= 1.1;
489     sctx.nshift_max = 5;
490     sctx.shift_lo   = 0.;
491     sctx.shift_hi   = 1.;
492   }
493 
494   PetscCall(ISGetIndices(isrow, &r));
495   PetscCall(ISGetIndices(isicol, &ic));
496   PetscCall(PetscMalloc1(n + 1, &rtmp));
497   ics = ic;
498 
499   do {
500     sctx.newshift = PETSC_FALSE;
501     for (i = 0; i < n; i++) {
502       /* zero rtmp */
503       /* L part */
504       nz    = bi[i + 1] - bi[i];
505       bjtmp = bj + bi[i];
506       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
507 
508       /* U part */
509       nz    = bdiag[i] - bdiag[i + 1];
510       bjtmp = bj + bdiag[i + 1] + 1;
511       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
512 
513       /* load in initial (unfactored row) */
514       nz    = ai[r[i] + 1] - ai[r[i]];
515       ajtmp = aj + ai[r[i]];
516       v     = aa + ai[r[i]];
517       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
518       /* ZeropivotApply() */
519       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
520 
521       /* elimination */
522       bjtmp = bj + bi[i];
523       row   = *bjtmp++;
524       nzL   = bi[i + 1] - bi[i];
525       for (k = 0; k < nzL; k++) {
526         pc = rtmp + row;
527         if (*pc != 0.0) {
528           pv         = ba + bdiag[row];
529           multiplier = *pc * (*pv);
530           *pc        = multiplier;
531 
532           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
533           pv = ba + bdiag[row + 1] + 1;
534           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
535 
536           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
537           PetscCall(PetscLogFlops(1 + 2.0 * nz));
538         }
539         row = *bjtmp++;
540       }
541 
542       /* finished row so stick it into b->a */
543       rs = 0.0;
544       /* L part */
545       pv = ba + bi[i];
546       pj = b->j + bi[i];
547       nz = bi[i + 1] - bi[i];
548       for (j = 0; j < nz; j++) {
549         pv[j] = rtmp[pj[j]];
550         rs += PetscAbsScalar(pv[j]);
551       }
552 
553       /* U part */
554       pv = ba + bdiag[i + 1] + 1;
555       pj = b->j + bdiag[i + 1] + 1;
556       nz = bdiag[i] - bdiag[i + 1] - 1;
557       for (j = 0; j < nz; j++) {
558         pv[j] = rtmp[pj[j]];
559         rs += PetscAbsScalar(pv[j]);
560       }
561 
562       sctx.rs = rs;
563       sctx.pv = rtmp[i];
564       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
565       if (sctx.newshift) break; /* break for-loop */
566       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
567 
568       /* Mark diagonal and invert diagonal for simpler triangular solves */
569       pv  = ba + bdiag[i];
570       *pv = 1.0 / rtmp[i];
571 
572     } /* endof for (i=0; i<n; i++) { */
573 
574     /* MatPivotRefine() */
575     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
576       /*
577        * if no shift in this attempt & shifting & started shifting & can refine,
578        * then try lower shift
579        */
580       sctx.shift_hi       = sctx.shift_fraction;
581       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
582       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
583       sctx.newshift       = PETSC_TRUE;
584       sctx.nshift++;
585     }
586   } while (sctx.newshift);
587 
588   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
589   PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
590 
591   PetscCall(PetscFree(rtmp));
592   PetscCall(ISRestoreIndices(isicol, &ic));
593   PetscCall(ISRestoreIndices(isrow, &r));
594 
595   PetscCall(ISIdentity(isrow, &row_identity));
596   PetscCall(ISIdentity(isicol, &col_identity));
597   if (b->inode.size_csr) {
598     C->ops->solve = MatSolve_SeqAIJ_Inode;
599   } else if (row_identity && col_identity) {
600     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
601   } else {
602     C->ops->solve = MatSolve_SeqAIJ;
603   }
604   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
605   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
606   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
607   C->ops->matsolve          = MatMatSolve_SeqAIJ;
608   C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
609   C->assembled              = PETSC_TRUE;
610   C->preallocated           = PETSC_TRUE;
611 
612   PetscCall(PetscLogFlops(C->cmap->n));
613 
614   /* MatShiftView(A,info,&sctx) */
615   if (sctx.nshift) {
616     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
617       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
618     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
619       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
620     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
621       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
622     }
623   }
624   PetscFunctionReturn(PETSC_SUCCESS);
625 }
626 
627 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
628 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
629 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
630 
631 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
632 {
633   Mat              C = B;
634   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
635   IS               isrow = b->row, isicol = b->icol;
636   const PetscInt  *r, *ic, *ics;
637   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
638   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
639   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
640   MatScalar       *pv, *rtmp, *pc, multiplier, d;
641   const MatScalar *v, *aa;
642   MatScalar       *ba;
643   PetscReal        rs = 0.0;
644   FactorShiftCtx   sctx;
645   const PetscInt  *ddiag;
646   PetscBool        row_identity, col_identity;
647 
648   PetscFunctionBegin;
649   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
650   PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
651   /* MatPivotSetUp(): initialize shift context sctx */
652   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
653 
654   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
655     ddiag          = a->diag;
656     sctx.shift_top = info->zeropivot;
657     for (i = 0; i < n; i++) {
658       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
659       d  = (aa)[ddiag[i]];
660       rs = -PetscAbsScalar(d) - PetscRealPart(d);
661       v  = aa + ai[i];
662       nz = ai[i + 1] - ai[i];
663       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
664       if (rs > sctx.shift_top) sctx.shift_top = rs;
665     }
666     sctx.shift_top *= 1.1;
667     sctx.nshift_max = 5;
668     sctx.shift_lo   = 0.;
669     sctx.shift_hi   = 1.;
670   }
671 
672   PetscCall(ISGetIndices(isrow, &r));
673   PetscCall(ISGetIndices(isicol, &ic));
674   PetscCall(PetscMalloc1(n + 1, &rtmp));
675   ics = ic;
676 
677   do {
678     sctx.newshift = PETSC_FALSE;
679     for (i = 0; i < n; i++) {
680       nz    = bi[i + 1] - bi[i];
681       bjtmp = bj + bi[i];
682       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
683 
684       /* load in initial (unfactored row) */
685       nz    = ai[r[i] + 1] - ai[r[i]];
686       ajtmp = aj + ai[r[i]];
687       v     = aa + ai[r[i]];
688       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
689       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
690 
691       row = *bjtmp++;
692       while (row < i) {
693         pc = rtmp + row;
694         if (*pc != 0.0) {
695           pv         = ba + diag_offset[row];
696           pj         = b->j + diag_offset[row] + 1;
697           multiplier = *pc / *pv++;
698           *pc        = multiplier;
699           nz         = bi[row + 1] - diag_offset[row] - 1;
700           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
701           PetscCall(PetscLogFlops(1 + 2.0 * nz));
702         }
703         row = *bjtmp++;
704       }
705       /* finished row so stick it into b->a */
706       pv   = ba + bi[i];
707       pj   = b->j + bi[i];
708       nz   = bi[i + 1] - bi[i];
709       diag = diag_offset[i] - bi[i];
710       rs   = 0.0;
711       for (j = 0; j < nz; j++) {
712         pv[j] = rtmp[pj[j]];
713         rs += PetscAbsScalar(pv[j]);
714       }
715       rs -= PetscAbsScalar(pv[diag]);
716 
717       sctx.rs = rs;
718       sctx.pv = pv[diag];
719       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
720       if (sctx.newshift) break;
721       pv[diag] = sctx.pv;
722     }
723 
724     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
725       /*
726        * if no shift in this attempt & shifting & started shifting & can refine,
727        * then try lower shift
728        */
729       sctx.shift_hi       = sctx.shift_fraction;
730       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
731       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
732       sctx.newshift       = PETSC_TRUE;
733       sctx.nshift++;
734     }
735   } while (sctx.newshift);
736 
737   /* invert diagonal entries for simpler triangular solves */
738   for (i = 0; i < n; i++) ba[diag_offset[i]] = 1.0 / ba[diag_offset[i]];
739   PetscCall(PetscFree(rtmp));
740   PetscCall(ISRestoreIndices(isicol, &ic));
741   PetscCall(ISRestoreIndices(isrow, &r));
742   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
743   PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
744 
745   PetscCall(ISIdentity(isrow, &row_identity));
746   PetscCall(ISIdentity(isicol, &col_identity));
747   if (row_identity && col_identity) {
748     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
749   } else {
750     C->ops->solve = MatSolve_SeqAIJ_inplace;
751   }
752   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
753   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
754   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
755   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
756   C->ops->matsolvetranspose = NULL;
757 
758   C->assembled    = PETSC_TRUE;
759   C->preallocated = PETSC_TRUE;
760 
761   PetscCall(PetscLogFlops(C->cmap->n));
762   if (sctx.nshift) {
763     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
764       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
765     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
766       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
767     }
768   }
769   C->ops->solve          = MatSolve_SeqAIJ_inplace;
770   C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
771 
772   PetscCall(MatSeqAIJCheckInode(C));
773   PetscFunctionReturn(PETSC_SUCCESS);
774 }
775 
776 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
777 
778 /*
779    This routine implements inplace ILU(0) with row or/and column permutations.
780    Input:
781      A - original matrix
782    Output;
783      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
784          a->j (col index) is permuted by the inverse of colperm, then sorted
785          a->a reordered accordingly with a->j
786          a->diag (ptr to diagonal elements) is updated.
787 */
788 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
789 {
790   Mat_SeqAIJ     *a     = (Mat_SeqAIJ *)A->data;
791   IS              isrow = a->row, isicol = a->icol;
792   const PetscInt *r, *ic, *ics;
793   PetscInt        i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
794   PetscInt       *ajtmp, nz, row;
795   PetscInt       *diag = a->diag, nbdiag, *pj;
796   PetscScalar    *rtmp, *pc, multiplier, d;
797   MatScalar      *pv, *v;
798   PetscReal       rs;
799   FactorShiftCtx  sctx;
800   MatScalar      *aa, *vtmp;
801 
802   PetscFunctionBegin;
803   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
804 
805   PetscCall(MatSeqAIJGetArray(A, &aa));
806   /* MatPivotSetUp(): initialize shift context sctx */
807   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
808 
809   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
810     const PetscInt *ddiag = a->diag;
811     sctx.shift_top        = info->zeropivot;
812     for (i = 0; i < n; i++) {
813       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
814       d    = (aa)[ddiag[i]];
815       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
816       vtmp = aa + ai[i];
817       nz   = ai[i + 1] - ai[i];
818       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
819       if (rs > sctx.shift_top) sctx.shift_top = rs;
820     }
821     sctx.shift_top *= 1.1;
822     sctx.nshift_max = 5;
823     sctx.shift_lo   = 0.;
824     sctx.shift_hi   = 1.;
825   }
826 
827   PetscCall(ISGetIndices(isrow, &r));
828   PetscCall(ISGetIndices(isicol, &ic));
829   PetscCall(PetscMalloc1(n + 1, &rtmp));
830   PetscCall(PetscArrayzero(rtmp, n + 1));
831   ics = ic;
832 
833 #if defined(MV)
834   sctx.shift_top      = 0.;
835   sctx.nshift_max     = 0;
836   sctx.shift_lo       = 0.;
837   sctx.shift_hi       = 0.;
838   sctx.shift_fraction = 0.;
839 
840   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
841     sctx.shift_top = 0.;
842     for (i = 0; i < n; i++) {
843       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
844       d  = aa[diag[i]];
845       rs = -PetscAbsScalar(d) - PetscRealPart(d);
846       v  = aa + ai[i];
847       nz = ai[i + 1] - ai[i];
848       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
849       if (rs > sctx.shift_top) sctx.shift_top = rs;
850     }
851     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
852     sctx.shift_top *= 1.1;
853     sctx.nshift_max = 5;
854     sctx.shift_lo   = 0.;
855     sctx.shift_hi   = 1.;
856   }
857 
858   sctx.shift_amount = 0.;
859   sctx.nshift       = 0;
860 #endif
861 
862   do {
863     sctx.newshift = PETSC_FALSE;
864     for (i = 0; i < n; i++) {
865       /* load in initial unfactored row */
866       nz    = ai[r[i] + 1] - ai[r[i]];
867       ajtmp = aj + ai[r[i]];
868       v     = aa + ai[r[i]];
869       /* sort permuted ajtmp and values v accordingly */
870       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
871       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
872 
873       diag[r[i]] = ai[r[i]];
874       for (j = 0; j < nz; j++) {
875         rtmp[ajtmp[j]] = v[j];
876         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
877       }
878       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
879 
880       row = *ajtmp++;
881       while (row < i) {
882         pc = rtmp + row;
883         if (*pc != 0.0) {
884           pv = aa + diag[r[row]];
885           pj = aj + diag[r[row]] + 1;
886 
887           multiplier = *pc / *pv++;
888           *pc        = multiplier;
889           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
890           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
891           PetscCall(PetscLogFlops(1 + 2.0 * nz));
892         }
893         row = *ajtmp++;
894       }
895       /* finished row so overwrite it onto aa */
896       pv     = aa + ai[r[i]];
897       pj     = aj + ai[r[i]];
898       nz     = ai[r[i] + 1] - ai[r[i]];
899       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
900 
901       rs = 0.0;
902       for (j = 0; j < nz; j++) {
903         pv[j] = rtmp[pj[j]];
904         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
905       }
906 
907       sctx.rs = rs;
908       sctx.pv = pv[nbdiag];
909       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
910       if (sctx.newshift) break;
911       pv[nbdiag] = sctx.pv;
912     }
913 
914     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
915       /*
916        * if no shift in this attempt & shifting & started shifting & can refine,
917        * then try lower shift
918        */
919       sctx.shift_hi       = sctx.shift_fraction;
920       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
921       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
922       sctx.newshift       = PETSC_TRUE;
923       sctx.nshift++;
924     }
925   } while (sctx.newshift);
926 
927   /* invert diagonal entries for simpler triangular solves */
928   for (i = 0; i < n; i++) aa[diag[r[i]]] = 1.0 / aa[diag[r[i]]];
929 
930   PetscCall(MatSeqAIJRestoreArray(A, &aa));
931   PetscCall(PetscFree(rtmp));
932   PetscCall(ISRestoreIndices(isicol, &ic));
933   PetscCall(ISRestoreIndices(isrow, &r));
934 
935   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
936   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
937   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
938   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
939 
940   A->assembled    = PETSC_TRUE;
941   A->preallocated = PETSC_TRUE;
942 
943   PetscCall(PetscLogFlops(A->cmap->n));
944   if (sctx.nshift) {
945     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
946       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
947     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
948       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
949     }
950   }
951   PetscFunctionReturn(PETSC_SUCCESS);
952 }
953 
954 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
955 {
956   Mat C;
957 
958   PetscFunctionBegin;
959   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
960   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
961   PetscCall(MatLUFactorNumeric(C, A, info));
962 
963   A->ops->solve          = C->ops->solve;
964   A->ops->solvetranspose = C->ops->solvetranspose;
965 
966   PetscCall(MatHeaderMerge(A, &C));
967   PetscFunctionReturn(PETSC_SUCCESS);
968 }
969 
970 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
971 {
972   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
973   IS                 iscol = a->col, isrow = a->row;
974   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
975   PetscInt           nz;
976   const PetscInt    *rout, *cout, *r, *c;
977   PetscScalar       *x, *tmp, *tmps, sum;
978   const PetscScalar *b;
979   const MatScalar   *aa, *v;
980 
981   PetscFunctionBegin;
982   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
983 
984   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
985   PetscCall(VecGetArrayRead(bb, &b));
986   PetscCall(VecGetArrayWrite(xx, &x));
987   tmp = a->solve_work;
988 
989   PetscCall(ISGetIndices(isrow, &rout));
990   r = rout;
991   PetscCall(ISGetIndices(iscol, &cout));
992   c = cout + (n - 1);
993 
994   /* forward solve the lower triangular */
995   tmp[0] = b[*r++];
996   tmps   = tmp;
997   for (i = 1; i < n; i++) {
998     v   = aa + ai[i];
999     vi  = aj + ai[i];
1000     nz  = a->diag[i] - ai[i];
1001     sum = b[*r++];
1002     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1003     tmp[i] = sum;
1004   }
1005 
1006   /* backward solve the upper triangular */
1007   for (i = n - 1; i >= 0; i--) {
1008     v   = aa + a->diag[i] + 1;
1009     vi  = aj + a->diag[i] + 1;
1010     nz  = ai[i + 1] - a->diag[i] - 1;
1011     sum = tmp[i];
1012     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1013     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
1014   }
1015   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1016   PetscCall(ISRestoreIndices(isrow, &rout));
1017   PetscCall(ISRestoreIndices(iscol, &cout));
1018   PetscCall(VecRestoreArrayRead(bb, &b));
1019   PetscCall(VecRestoreArrayWrite(xx, &x));
1020   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1021   PetscFunctionReturn(PETSC_SUCCESS);
1022 }
1023 
1024 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1025 {
1026   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1027   IS                 iscol = a->col, isrow = a->row;
1028   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1029   PetscInt           nz, neq, ldb, ldx;
1030   const PetscInt    *rout, *cout, *r, *c;
1031   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
1032   const PetscScalar *b, *aa, *v;
1033   PetscBool          isdense;
1034 
1035   PetscFunctionBegin;
1036   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1037   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1038   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1039   if (X != B) {
1040     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1041     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1042   }
1043   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1044   PetscCall(MatDenseGetArrayRead(B, &b));
1045   PetscCall(MatDenseGetLDA(B, &ldb));
1046   PetscCall(MatDenseGetArray(X, &x));
1047   PetscCall(MatDenseGetLDA(X, &ldx));
1048   PetscCall(ISGetIndices(isrow, &rout));
1049   r = rout;
1050   PetscCall(ISGetIndices(iscol, &cout));
1051   c = cout;
1052   for (neq = 0; neq < B->cmap->n; neq++) {
1053     /* forward solve the lower triangular */
1054     tmp[0] = b[r[0]];
1055     tmps   = tmp;
1056     for (i = 1; i < n; i++) {
1057       v   = aa + ai[i];
1058       vi  = aj + ai[i];
1059       nz  = a->diag[i] - ai[i];
1060       sum = b[r[i]];
1061       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1062       tmp[i] = sum;
1063     }
1064     /* backward solve the upper triangular */
1065     for (i = n - 1; i >= 0; i--) {
1066       v   = aa + a->diag[i] + 1;
1067       vi  = aj + a->diag[i] + 1;
1068       nz  = ai[i + 1] - a->diag[i] - 1;
1069       sum = tmp[i];
1070       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1071       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1072     }
1073     b += ldb;
1074     x += ldx;
1075   }
1076   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1077   PetscCall(ISRestoreIndices(isrow, &rout));
1078   PetscCall(ISRestoreIndices(iscol, &cout));
1079   PetscCall(MatDenseRestoreArrayRead(B, &b));
1080   PetscCall(MatDenseRestoreArray(X, &x));
1081   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1082   PetscFunctionReturn(PETSC_SUCCESS);
1083 }
1084 
1085 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1086 {
1087   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1088   IS                 iscol = a->col, isrow = a->row;
1089   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1090   PetscInt           nz, neq, ldb, ldx;
1091   const PetscInt    *rout, *cout, *r, *c;
1092   PetscScalar       *x, *tmp = a->solve_work, sum;
1093   const PetscScalar *b, *aa, *v;
1094   PetscBool          isdense;
1095 
1096   PetscFunctionBegin;
1097   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1098   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1099   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1100   if (X != B) {
1101     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1102     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1103   }
1104   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1105   PetscCall(MatDenseGetArrayRead(B, &b));
1106   PetscCall(MatDenseGetLDA(B, &ldb));
1107   PetscCall(MatDenseGetArray(X, &x));
1108   PetscCall(MatDenseGetLDA(X, &ldx));
1109   PetscCall(ISGetIndices(isrow, &rout));
1110   r = rout;
1111   PetscCall(ISGetIndices(iscol, &cout));
1112   c = cout;
1113   for (neq = 0; neq < B->cmap->n; neq++) {
1114     /* forward solve the lower triangular */
1115     tmp[0] = b[r[0]];
1116     v      = aa;
1117     vi     = aj;
1118     for (i = 1; i < n; i++) {
1119       nz  = ai[i + 1] - ai[i];
1120       sum = b[r[i]];
1121       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1122       tmp[i] = sum;
1123       v += nz;
1124       vi += nz;
1125     }
1126     /* backward solve the upper triangular */
1127     for (i = n - 1; i >= 0; i--) {
1128       v   = aa + adiag[i + 1] + 1;
1129       vi  = aj + adiag[i + 1] + 1;
1130       nz  = adiag[i] - adiag[i + 1] - 1;
1131       sum = tmp[i];
1132       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1133       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1134     }
1135     b += ldb;
1136     x += ldx;
1137   }
1138   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1139   PetscCall(ISRestoreIndices(isrow, &rout));
1140   PetscCall(ISRestoreIndices(iscol, &cout));
1141   PetscCall(MatDenseRestoreArrayRead(B, &b));
1142   PetscCall(MatDenseRestoreArray(X, &x));
1143   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1144   PetscFunctionReturn(PETSC_SUCCESS);
1145 }
1146 
1147 PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
1148 {
1149   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1150   IS                 iscol = a->col, isrow = a->row;
1151   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, j;
1152   PetscInt           nz, neq, ldb, ldx;
1153   const PetscInt    *rout, *cout, *r, *c;
1154   PetscScalar       *x, *tmp = a->solve_work, s1;
1155   const PetscScalar *b, *aa, *v;
1156   PetscBool          isdense;
1157 
1158   PetscFunctionBegin;
1159   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1160   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1161   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1162   if (X != B) {
1163     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1164     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1165   }
1166   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1167   PetscCall(MatDenseGetArrayRead(B, &b));
1168   PetscCall(MatDenseGetLDA(B, &ldb));
1169   PetscCall(MatDenseGetArray(X, &x));
1170   PetscCall(MatDenseGetLDA(X, &ldx));
1171   PetscCall(ISGetIndices(isrow, &rout));
1172   r = rout;
1173   PetscCall(ISGetIndices(iscol, &cout));
1174   c = cout;
1175   for (neq = 0; neq < B->cmap->n; neq++) {
1176     /* copy the b into temp work space according to permutation */
1177     for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1178 
1179     /* forward solve the U^T */
1180     for (i = 0; i < n; i++) {
1181       v  = aa + adiag[i + 1] + 1;
1182       vi = aj + adiag[i + 1] + 1;
1183       nz = adiag[i] - adiag[i + 1] - 1;
1184       s1 = tmp[i];
1185       s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1186       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1187       tmp[i] = s1;
1188     }
1189 
1190     /* backward solve the L^T */
1191     for (i = n - 1; i >= 0; i--) {
1192       v  = aa + ai[i];
1193       vi = aj + ai[i];
1194       nz = ai[i + 1] - ai[i];
1195       s1 = tmp[i];
1196       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1197     }
1198 
1199     /* copy tmp into x according to permutation */
1200     for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1201     b += ldb;
1202     x += ldx;
1203   }
1204   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1205   PetscCall(ISRestoreIndices(isrow, &rout));
1206   PetscCall(ISRestoreIndices(iscol, &cout));
1207   PetscCall(MatDenseRestoreArrayRead(B, &b));
1208   PetscCall(MatDenseRestoreArray(X, &x));
1209   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1210   PetscFunctionReturn(PETSC_SUCCESS);
1211 }
1212 
1213 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1214 {
1215   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1216   IS                 iscol = a->col, isrow = a->row;
1217   const PetscInt    *r, *c, *rout, *cout;
1218   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1219   PetscInt           nz, row;
1220   PetscScalar       *x, *tmp, *tmps, sum;
1221   const PetscScalar *b;
1222   const MatScalar   *aa, *v;
1223 
1224   PetscFunctionBegin;
1225   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1226 
1227   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1228   PetscCall(VecGetArrayRead(bb, &b));
1229   PetscCall(VecGetArrayWrite(xx, &x));
1230   tmp = a->solve_work;
1231 
1232   PetscCall(ISGetIndices(isrow, &rout));
1233   r = rout;
1234   PetscCall(ISGetIndices(iscol, &cout));
1235   c = cout + (n - 1);
1236 
1237   /* forward solve the lower triangular */
1238   tmp[0] = b[*r++];
1239   tmps   = tmp;
1240   for (row = 1; row < n; row++) {
1241     i   = rout[row]; /* permuted row */
1242     v   = aa + ai[i];
1243     vi  = aj + ai[i];
1244     nz  = a->diag[i] - ai[i];
1245     sum = b[*r++];
1246     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1247     tmp[row] = sum;
1248   }
1249 
1250   /* backward solve the upper triangular */
1251   for (row = n - 1; row >= 0; row--) {
1252     i   = rout[row]; /* permuted row */
1253     v   = aa + a->diag[i] + 1;
1254     vi  = aj + a->diag[i] + 1;
1255     nz  = ai[i + 1] - a->diag[i] - 1;
1256     sum = tmp[row];
1257     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1258     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1259   }
1260   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1261   PetscCall(ISRestoreIndices(isrow, &rout));
1262   PetscCall(ISRestoreIndices(iscol, &cout));
1263   PetscCall(VecRestoreArrayRead(bb, &b));
1264   PetscCall(VecRestoreArrayWrite(xx, &x));
1265   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1266   PetscFunctionReturn(PETSC_SUCCESS);
1267 }
1268 
1269 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1270 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1271 {
1272   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1273   PetscInt           n  = A->rmap->n;
1274   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1275   PetscScalar       *x;
1276   const PetscScalar *b;
1277   const MatScalar   *aa;
1278 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1279   PetscInt         adiag_i, i, nz, ai_i;
1280   const PetscInt  *vi;
1281   const MatScalar *v;
1282   PetscScalar      sum;
1283 #endif
1284 
1285   PetscFunctionBegin;
1286   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1287 
1288   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1289   PetscCall(VecGetArrayRead(bb, &b));
1290   PetscCall(VecGetArrayWrite(xx, &x));
1291 
1292 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1293   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1294 #else
1295   /* forward solve the lower triangular */
1296   x[0] = b[0];
1297   for (i = 1; i < n; i++) {
1298     ai_i = ai[i];
1299     v    = aa + ai_i;
1300     vi   = aj + ai_i;
1301     nz   = adiag[i] - ai_i;
1302     sum  = b[i];
1303     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1304     x[i] = sum;
1305   }
1306 
1307   /* backward solve the upper triangular */
1308   for (i = n - 1; i >= 0; i--) {
1309     adiag_i = adiag[i];
1310     v       = aa + adiag_i + 1;
1311     vi      = aj + adiag_i + 1;
1312     nz      = ai[i + 1] - adiag_i - 1;
1313     sum     = x[i];
1314     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1315     x[i] = sum * aa[adiag_i];
1316   }
1317 #endif
1318   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1319   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1320   PetscCall(VecRestoreArrayRead(bb, &b));
1321   PetscCall(VecRestoreArrayWrite(xx, &x));
1322   PetscFunctionReturn(PETSC_SUCCESS);
1323 }
1324 
1325 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1326 {
1327   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1328   IS                 iscol = a->col, isrow = a->row;
1329   PetscInt           i, n                  = A->rmap->n, j;
1330   PetscInt           nz;
1331   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1332   PetscScalar       *x, *tmp, sum;
1333   const PetscScalar *b;
1334   const MatScalar   *aa, *v;
1335 
1336   PetscFunctionBegin;
1337   if (yy != xx) PetscCall(VecCopy(yy, xx));
1338 
1339   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1340   PetscCall(VecGetArrayRead(bb, &b));
1341   PetscCall(VecGetArray(xx, &x));
1342   tmp = a->solve_work;
1343 
1344   PetscCall(ISGetIndices(isrow, &rout));
1345   r = rout;
1346   PetscCall(ISGetIndices(iscol, &cout));
1347   c = cout + (n - 1);
1348 
1349   /* forward solve the lower triangular */
1350   tmp[0] = b[*r++];
1351   for (i = 1; i < n; i++) {
1352     v   = aa + ai[i];
1353     vi  = aj + ai[i];
1354     nz  = a->diag[i] - ai[i];
1355     sum = b[*r++];
1356     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1357     tmp[i] = sum;
1358   }
1359 
1360   /* backward solve the upper triangular */
1361   for (i = n - 1; i >= 0; i--) {
1362     v   = aa + a->diag[i] + 1;
1363     vi  = aj + a->diag[i] + 1;
1364     nz  = ai[i + 1] - a->diag[i] - 1;
1365     sum = tmp[i];
1366     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1367     tmp[i] = sum * aa[a->diag[i]];
1368     x[*c--] += tmp[i];
1369   }
1370 
1371   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1372   PetscCall(ISRestoreIndices(isrow, &rout));
1373   PetscCall(ISRestoreIndices(iscol, &cout));
1374   PetscCall(VecRestoreArrayRead(bb, &b));
1375   PetscCall(VecRestoreArray(xx, &x));
1376   PetscCall(PetscLogFlops(2.0 * a->nz));
1377   PetscFunctionReturn(PETSC_SUCCESS);
1378 }
1379 
1380 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1381 {
1382   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1383   IS                 iscol = a->col, isrow = a->row;
1384   PetscInt           i, n                  = A->rmap->n, j;
1385   PetscInt           nz;
1386   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1387   PetscScalar       *x, *tmp, sum;
1388   const PetscScalar *b;
1389   const MatScalar   *aa, *v;
1390 
1391   PetscFunctionBegin;
1392   if (yy != xx) PetscCall(VecCopy(yy, xx));
1393 
1394   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1395   PetscCall(VecGetArrayRead(bb, &b));
1396   PetscCall(VecGetArray(xx, &x));
1397   tmp = a->solve_work;
1398 
1399   PetscCall(ISGetIndices(isrow, &rout));
1400   r = rout;
1401   PetscCall(ISGetIndices(iscol, &cout));
1402   c = cout;
1403 
1404   /* forward solve the lower triangular */
1405   tmp[0] = b[r[0]];
1406   v      = aa;
1407   vi     = aj;
1408   for (i = 1; i < n; i++) {
1409     nz  = ai[i + 1] - ai[i];
1410     sum = b[r[i]];
1411     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1412     tmp[i] = sum;
1413     v += nz;
1414     vi += nz;
1415   }
1416 
1417   /* backward solve the upper triangular */
1418   v  = aa + adiag[n - 1];
1419   vi = aj + adiag[n - 1];
1420   for (i = n - 1; i >= 0; i--) {
1421     nz  = adiag[i] - adiag[i + 1] - 1;
1422     sum = tmp[i];
1423     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1424     tmp[i] = sum * v[nz];
1425     x[c[i]] += tmp[i];
1426     v += nz + 1;
1427     vi += nz + 1;
1428   }
1429 
1430   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1431   PetscCall(ISRestoreIndices(isrow, &rout));
1432   PetscCall(ISRestoreIndices(iscol, &cout));
1433   PetscCall(VecRestoreArrayRead(bb, &b));
1434   PetscCall(VecRestoreArray(xx, &x));
1435   PetscCall(PetscLogFlops(2.0 * a->nz));
1436   PetscFunctionReturn(PETSC_SUCCESS);
1437 }
1438 
1439 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1440 {
1441   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1442   IS                 iscol = a->col, isrow = a->row;
1443   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1444   PetscInt           i, n = A->rmap->n, j;
1445   PetscInt           nz;
1446   PetscScalar       *x, *tmp, s1;
1447   const MatScalar   *aa, *v;
1448   const PetscScalar *b;
1449 
1450   PetscFunctionBegin;
1451   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1452   PetscCall(VecGetArrayRead(bb, &b));
1453   PetscCall(VecGetArrayWrite(xx, &x));
1454   tmp = a->solve_work;
1455 
1456   PetscCall(ISGetIndices(isrow, &rout));
1457   r = rout;
1458   PetscCall(ISGetIndices(iscol, &cout));
1459   c = cout;
1460 
1461   /* copy the b into temp work space according to permutation */
1462   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1463 
1464   /* forward solve the U^T */
1465   for (i = 0; i < n; i++) {
1466     v  = aa + diag[i];
1467     vi = aj + diag[i] + 1;
1468     nz = ai[i + 1] - diag[i] - 1;
1469     s1 = tmp[i];
1470     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1471     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1472     tmp[i] = s1;
1473   }
1474 
1475   /* backward solve the L^T */
1476   for (i = n - 1; i >= 0; i--) {
1477     v  = aa + diag[i] - 1;
1478     vi = aj + diag[i] - 1;
1479     nz = diag[i] - ai[i];
1480     s1 = tmp[i];
1481     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1482   }
1483 
1484   /* copy tmp into x according to permutation */
1485   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1486 
1487   PetscCall(ISRestoreIndices(isrow, &rout));
1488   PetscCall(ISRestoreIndices(iscol, &cout));
1489   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1490   PetscCall(VecRestoreArrayRead(bb, &b));
1491   PetscCall(VecRestoreArrayWrite(xx, &x));
1492 
1493   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1494   PetscFunctionReturn(PETSC_SUCCESS);
1495 }
1496 
1497 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1498 {
1499   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1500   IS                 iscol = a->col, isrow = a->row;
1501   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1502   PetscInt           i, n = A->rmap->n, j;
1503   PetscInt           nz;
1504   PetscScalar       *x, *tmp, s1;
1505   const MatScalar   *aa, *v;
1506   const PetscScalar *b;
1507 
1508   PetscFunctionBegin;
1509   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1510   PetscCall(VecGetArrayRead(bb, &b));
1511   PetscCall(VecGetArrayWrite(xx, &x));
1512   tmp = a->solve_work;
1513 
1514   PetscCall(ISGetIndices(isrow, &rout));
1515   r = rout;
1516   PetscCall(ISGetIndices(iscol, &cout));
1517   c = cout;
1518 
1519   /* copy the b into temp work space according to permutation */
1520   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1521 
1522   /* forward solve the U^T */
1523   for (i = 0; i < n; i++) {
1524     v  = aa + adiag[i + 1] + 1;
1525     vi = aj + adiag[i + 1] + 1;
1526     nz = adiag[i] - adiag[i + 1] - 1;
1527     s1 = tmp[i];
1528     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1529     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1530     tmp[i] = s1;
1531   }
1532 
1533   /* backward solve the L^T */
1534   for (i = n - 1; i >= 0; i--) {
1535     v  = aa + ai[i];
1536     vi = aj + ai[i];
1537     nz = ai[i + 1] - ai[i];
1538     s1 = tmp[i];
1539     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1540   }
1541 
1542   /* copy tmp into x according to permutation */
1543   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1544 
1545   PetscCall(ISRestoreIndices(isrow, &rout));
1546   PetscCall(ISRestoreIndices(iscol, &cout));
1547   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1548   PetscCall(VecRestoreArrayRead(bb, &b));
1549   PetscCall(VecRestoreArrayWrite(xx, &x));
1550 
1551   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1552   PetscFunctionReturn(PETSC_SUCCESS);
1553 }
1554 
1555 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1556 {
1557   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1558   IS                 iscol = a->col, isrow = a->row;
1559   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1560   PetscInt           i, n = A->rmap->n, j;
1561   PetscInt           nz;
1562   PetscScalar       *x, *tmp, s1;
1563   const MatScalar   *aa, *v;
1564   const PetscScalar *b;
1565 
1566   PetscFunctionBegin;
1567   if (zz != xx) PetscCall(VecCopy(zz, xx));
1568   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1569   PetscCall(VecGetArrayRead(bb, &b));
1570   PetscCall(VecGetArray(xx, &x));
1571   tmp = a->solve_work;
1572 
1573   PetscCall(ISGetIndices(isrow, &rout));
1574   r = rout;
1575   PetscCall(ISGetIndices(iscol, &cout));
1576   c = cout;
1577 
1578   /* copy the b into temp work space according to permutation */
1579   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1580 
1581   /* forward solve the U^T */
1582   for (i = 0; i < n; i++) {
1583     v  = aa + diag[i];
1584     vi = aj + diag[i] + 1;
1585     nz = ai[i + 1] - diag[i] - 1;
1586     s1 = tmp[i];
1587     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1588     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1589     tmp[i] = s1;
1590   }
1591 
1592   /* backward solve the L^T */
1593   for (i = n - 1; i >= 0; i--) {
1594     v  = aa + diag[i] - 1;
1595     vi = aj + diag[i] - 1;
1596     nz = diag[i] - ai[i];
1597     s1 = tmp[i];
1598     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1599   }
1600 
1601   /* copy tmp into x according to permutation */
1602   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1603 
1604   PetscCall(ISRestoreIndices(isrow, &rout));
1605   PetscCall(ISRestoreIndices(iscol, &cout));
1606   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1607   PetscCall(VecRestoreArrayRead(bb, &b));
1608   PetscCall(VecRestoreArray(xx, &x));
1609 
1610   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1611   PetscFunctionReturn(PETSC_SUCCESS);
1612 }
1613 
1614 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1615 {
1616   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1617   IS                 iscol = a->col, isrow = a->row;
1618   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1619   PetscInt           i, n = A->rmap->n, j;
1620   PetscInt           nz;
1621   PetscScalar       *x, *tmp, s1;
1622   const MatScalar   *aa, *v;
1623   const PetscScalar *b;
1624 
1625   PetscFunctionBegin;
1626   if (zz != xx) PetscCall(VecCopy(zz, xx));
1627   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1628   PetscCall(VecGetArrayRead(bb, &b));
1629   PetscCall(VecGetArray(xx, &x));
1630   tmp = a->solve_work;
1631 
1632   PetscCall(ISGetIndices(isrow, &rout));
1633   r = rout;
1634   PetscCall(ISGetIndices(iscol, &cout));
1635   c = cout;
1636 
1637   /* copy the b into temp work space according to permutation */
1638   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1639 
1640   /* forward solve the U^T */
1641   for (i = 0; i < n; i++) {
1642     v  = aa + adiag[i + 1] + 1;
1643     vi = aj + adiag[i + 1] + 1;
1644     nz = adiag[i] - adiag[i + 1] - 1;
1645     s1 = tmp[i];
1646     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1647     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1648     tmp[i] = s1;
1649   }
1650 
1651   /* backward solve the L^T */
1652   for (i = n - 1; i >= 0; i--) {
1653     v  = aa + ai[i];
1654     vi = aj + ai[i];
1655     nz = ai[i + 1] - ai[i];
1656     s1 = tmp[i];
1657     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1658   }
1659 
1660   /* copy tmp into x according to permutation */
1661   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1662 
1663   PetscCall(ISRestoreIndices(isrow, &rout));
1664   PetscCall(ISRestoreIndices(iscol, &cout));
1665   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1666   PetscCall(VecRestoreArrayRead(bb, &b));
1667   PetscCall(VecRestoreArray(xx, &x));
1668 
1669   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1670   PetscFunctionReturn(PETSC_SUCCESS);
1671 }
1672 
1673 /*
1674    ilu() under revised new data structure.
1675    Factored arrays bj and ba are stored as
1676      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1677 
1678    bi=fact->i is an array of size n+1, in which
1679      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1680      bi[n]:  points to L(n-1,n-1)+1
1681 
1682   bdiag=fact->diag is an array of size n+1,in which
1683      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1684      bdiag[n]: points to entry of U(n-1,0)-1
1685 
1686    U(i,:) contains bdiag[i] as its last entry, i.e.,
1687     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1688 */
1689 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1690 {
1691   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1692   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1693   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1694   IS             isicol;
1695 
1696   PetscFunctionBegin;
1697   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1698   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1699   b = (Mat_SeqAIJ *)fact->data;
1700 
1701   /* allocate matrix arrays for new data structure */
1702   PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscScalar), (void **)&b->a));
1703   PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscInt), (void **)&b->j));
1704   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i));
1705   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1706   b->free_a  = PETSC_TRUE;
1707   b->free_ij = PETSC_TRUE;
1708 
1709   if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1710   bdiag = b->diag;
1711 
1712   /* set bi and bj with new data structure */
1713   bi = b->i;
1714   bj = b->j;
1715 
1716   /* L part */
1717   bi[0] = 0;
1718   for (i = 0; i < n; i++) {
1719     nz        = adiag[i] - ai[i];
1720     bi[i + 1] = bi[i] + nz;
1721     aj        = a->j + ai[i];
1722     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1723   }
1724 
1725   /* U part */
1726   bdiag[n] = bi[n] - 1;
1727   for (i = n - 1; i >= 0; i--) {
1728     nz = ai[i + 1] - adiag[i] - 1;
1729     aj = a->j + adiag[i] + 1;
1730     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1731     /* diag[i] */
1732     bj[k++]  = i;
1733     bdiag[i] = bdiag[i + 1] + nz + 1;
1734   }
1735 
1736   fact->factortype             = MAT_FACTOR_ILU;
1737   fact->info.factor_mallocs    = 0;
1738   fact->info.fill_ratio_given  = info->fill;
1739   fact->info.fill_ratio_needed = 1.0;
1740   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1741   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1742 
1743   b       = (Mat_SeqAIJ *)fact->data;
1744   b->row  = isrow;
1745   b->col  = iscol;
1746   b->icol = isicol;
1747   PetscCall(PetscMalloc1(fact->rmap->n, &b->solve_work));
1748   PetscCall(PetscObjectReference((PetscObject)isrow));
1749   PetscCall(PetscObjectReference((PetscObject)iscol));
1750   PetscFunctionReturn(PETSC_SUCCESS);
1751 }
1752 
1753 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1754 {
1755   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1756   IS                 isicol;
1757   const PetscInt    *r, *ic;
1758   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1759   PetscInt          *bi, *cols, nnz, *cols_lvl;
1760   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1761   PetscInt           i, levels, diagonal_fill;
1762   PetscBool          col_identity, row_identity, missing;
1763   PetscReal          f;
1764   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1765   PetscBT            lnkbt;
1766   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1767   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1768   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1769 
1770   PetscFunctionBegin;
1771   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1772   PetscCall(MatMissingDiagonal(A, &missing, &i));
1773   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1774 
1775   levels = (PetscInt)info->levels;
1776   PetscCall(ISIdentity(isrow, &row_identity));
1777   PetscCall(ISIdentity(iscol, &col_identity));
1778   if (!levels && row_identity && col_identity) {
1779     /* special case: ilu(0) with natural ordering */
1780     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1781     if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1782     PetscFunctionReturn(PETSC_SUCCESS);
1783   }
1784 
1785   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1786   PetscCall(ISGetIndices(isrow, &r));
1787   PetscCall(ISGetIndices(isicol, &ic));
1788 
1789   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1790   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
1791   PetscCall(PetscMalloc1(n + 1, &bdiag));
1792   bi[0] = bdiag[0] = 0;
1793   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1794 
1795   /* create a linked list for storing column indices of the active row */
1796   nlnk = n + 1;
1797   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1798 
1799   /* initial FreeSpace size is f*(ai[n]+1) */
1800   f             = info->fill;
1801   diagonal_fill = (PetscInt)info->diagonal_fill;
1802   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1803   current_space = free_space;
1804   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1805   current_space_lvl = free_space_lvl;
1806   for (i = 0; i < n; i++) {
1807     nzi = 0;
1808     /* copy current row into linked list */
1809     nnz = ai[r[i] + 1] - ai[r[i]];
1810     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1811     cols   = aj + ai[r[i]];
1812     lnk[i] = -1; /* marker to indicate if diagonal exists */
1813     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1814     nzi += nlnk;
1815 
1816     /* make sure diagonal entry is included */
1817     if (diagonal_fill && lnk[i] == -1) {
1818       fm = n;
1819       while (lnk[fm] < i) fm = lnk[fm];
1820       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1821       lnk[fm]    = i;
1822       lnk_lvl[i] = 0;
1823       nzi++;
1824       dcount++;
1825     }
1826 
1827     /* add pivot rows into the active row */
1828     nzbd = 0;
1829     prow = lnk[n];
1830     while (prow < i) {
1831       nnz      = bdiag[prow];
1832       cols     = bj_ptr[prow] + nnz + 1;
1833       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1834       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1835       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1836       nzi += nlnk;
1837       prow = lnk[prow];
1838       nzbd++;
1839     }
1840     bdiag[i]  = nzbd;
1841     bi[i + 1] = bi[i] + nzi;
1842     /* if free space is not available, make more free space */
1843     if (current_space->local_remaining < nzi) {
1844       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1845       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1846       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1847       reallocs++;
1848     }
1849 
1850     /* copy data into free_space and free_space_lvl, then initialize lnk */
1851     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1852     bj_ptr[i]    = current_space->array;
1853     bjlvl_ptr[i] = current_space_lvl->array;
1854 
1855     /* make sure the active row i has diagonal entry */
1856     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1857 
1858     current_space->array += nzi;
1859     current_space->local_used += nzi;
1860     current_space->local_remaining -= nzi;
1861     current_space_lvl->array += nzi;
1862     current_space_lvl->local_used += nzi;
1863     current_space_lvl->local_remaining -= nzi;
1864   }
1865 
1866   PetscCall(ISRestoreIndices(isrow, &r));
1867   PetscCall(ISRestoreIndices(isicol, &ic));
1868   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1869   PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
1870   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1871 
1872   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1873   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1874   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1875 
1876 #if defined(PETSC_USE_INFO)
1877   {
1878     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1879     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1880     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1881     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1882     PetscCall(PetscInfo(A, "for best performance.\n"));
1883     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1884   }
1885 #endif
1886   /* put together the new matrix */
1887   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1888   b          = (Mat_SeqAIJ *)fact->data;
1889   b->free_ij = PETSC_TRUE;
1890   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
1891   b->free_a = PETSC_TRUE;
1892   b->j      = bj;
1893   b->i      = bi;
1894   b->diag   = bdiag;
1895   b->ilen   = NULL;
1896   b->imax   = NULL;
1897   b->row    = isrow;
1898   b->col    = iscol;
1899   PetscCall(PetscObjectReference((PetscObject)isrow));
1900   PetscCall(PetscObjectReference((PetscObject)iscol));
1901   b->icol = isicol;
1902 
1903   PetscCall(PetscMalloc1(n, &b->solve_work));
1904   /* In b structure:  Free imax, ilen, old a, old j.
1905      Allocate bdiag, solve_work, new a, new j */
1906   b->maxnz = b->nz = bdiag[0] + 1;
1907 
1908   fact->info.factor_mallocs    = reallocs;
1909   fact->info.fill_ratio_given  = f;
1910   fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1911   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1912   if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1913   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1914   PetscFunctionReturn(PETSC_SUCCESS);
1915 }
1916 
1917 #if 0
1918 // unused
1919 static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1920 {
1921   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1922   IS                 isicol;
1923   const PetscInt    *r, *ic;
1924   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1925   PetscInt          *bi, *cols, nnz, *cols_lvl;
1926   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1927   PetscInt           i, levels, diagonal_fill;
1928   PetscBool          col_identity, row_identity;
1929   PetscReal          f;
1930   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1931   PetscBT            lnkbt;
1932   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1933   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1934   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1935   PetscBool          missing;
1936 
1937   PetscFunctionBegin;
1938   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1939   PetscCall(MatMissingDiagonal(A, &missing, &i));
1940   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1941 
1942   f             = info->fill;
1943   levels        = (PetscInt)info->levels;
1944   diagonal_fill = (PetscInt)info->diagonal_fill;
1945 
1946   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1947 
1948   PetscCall(ISIdentity(isrow, &row_identity));
1949   PetscCall(ISIdentity(iscol, &col_identity));
1950   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1951     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));
1952 
1953     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1954     if (a->inode.size_csr) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1955     fact->factortype               = MAT_FACTOR_ILU;
1956     (fact)->info.factor_mallocs    = 0;
1957     (fact)->info.fill_ratio_given  = info->fill;
1958     (fact)->info.fill_ratio_needed = 1.0;
1959 
1960     b       = (Mat_SeqAIJ *)fact->data;
1961     b->row  = isrow;
1962     b->col  = iscol;
1963     b->icol = isicol;
1964     PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1965     PetscCall(PetscObjectReference((PetscObject)isrow));
1966     PetscCall(PetscObjectReference((PetscObject)iscol));
1967     PetscFunctionReturn(PETSC_SUCCESS);
1968   }
1969 
1970   PetscCall(ISGetIndices(isrow, &r));
1971   PetscCall(ISGetIndices(isicol, &ic));
1972 
1973   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1974   PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi));
1975   PetscCall(PetscMalloc1(n + 1, &bdiag));
1976   bi[0] = bdiag[0] = 0;
1977 
1978   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1979 
1980   /* create a linked list for storing column indices of the active row */
1981   nlnk = n + 1;
1982   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1983 
1984   /* initial FreeSpace size is f*(ai[n]+1) */
1985   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1986   current_space = free_space;
1987   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1988   current_space_lvl = free_space_lvl;
1989 
1990   for (i = 0; i < n; i++) {
1991     nzi = 0;
1992     /* copy current row into linked list */
1993     nnz = ai[r[i] + 1] - ai[r[i]];
1994     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1995     cols   = aj + ai[r[i]];
1996     lnk[i] = -1; /* marker to indicate if diagonal exists */
1997     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1998     nzi += nlnk;
1999 
2000     /* make sure diagonal entry is included */
2001     if (diagonal_fill && lnk[i] == -1) {
2002       fm = n;
2003       while (lnk[fm] < i) fm = lnk[fm];
2004       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
2005       lnk[fm]    = i;
2006       lnk_lvl[i] = 0;
2007       nzi++;
2008       dcount++;
2009     }
2010 
2011     /* add pivot rows into the active row */
2012     nzbd = 0;
2013     prow = lnk[n];
2014     while (prow < i) {
2015       nnz      = bdiag[prow];
2016       cols     = bj_ptr[prow] + nnz + 1;
2017       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
2018       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
2019       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
2020       nzi += nlnk;
2021       prow = lnk[prow];
2022       nzbd++;
2023     }
2024     bdiag[i]  = nzbd;
2025     bi[i + 1] = bi[i] + nzi;
2026 
2027     /* if free space is not available, make more free space */
2028     if (current_space->local_remaining < nzi) {
2029       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
2030       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
2031       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
2032       reallocs++;
2033     }
2034 
2035     /* copy data into free_space and free_space_lvl, then initialize lnk */
2036     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2037     bj_ptr[i]    = current_space->array;
2038     bjlvl_ptr[i] = current_space_lvl->array;
2039 
2040     /* make sure the active row i has diagonal entry */
2041     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
2042 
2043     current_space->array += nzi;
2044     current_space->local_used += nzi;
2045     current_space->local_remaining -= nzi;
2046     current_space_lvl->array += nzi;
2047     current_space_lvl->local_used += nzi;
2048     current_space_lvl->local_remaining -= nzi;
2049   }
2050 
2051   PetscCall(ISRestoreIndices(isrow, &r));
2052   PetscCall(ISRestoreIndices(isicol, &ic));
2053 
2054   /* destroy list of free space and other temporary arrays */
2055   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj));
2056   PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
2057   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2058   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2059   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
2060 
2061   #if defined(PETSC_USE_INFO)
2062   {
2063     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2064     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
2065     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
2066     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
2067     PetscCall(PetscInfo(A, "for best performance.\n"));
2068     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
2069   }
2070   #endif
2071 
2072   /* put together the new matrix */
2073   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
2074   b = (Mat_SeqAIJ *)fact->data;
2075   b->free_ij      = PETSC_TRUE;
2076   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a));
2077   b->free_a       = PETSC_TRUE;
2078   b->j = bj;
2079   b->i = bi;
2080   for (i = 0; i < n; i++) bdiag[i] += bi[i];
2081   b->diag = bdiag;
2082   b->ilen = NULL;
2083   b->imax = NULL;
2084   b->row  = isrow;
2085   b->col  = iscol;
2086   PetscCall(PetscObjectReference((PetscObject)isrow));
2087   PetscCall(PetscObjectReference((PetscObject)iscol));
2088   b->icol = isicol;
2089   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2090   /* In b structure:  Free imax, ilen, old a, old j.
2091      Allocate bdiag, solve_work, new a, new j */
2092   b->maxnz = b->nz = bi[n];
2093 
2094   fact->info.factor_mallocs    = reallocs;
2095   fact->info.fill_ratio_given  = f;
2096   fact->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2097   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
2098   if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2099   PetscFunctionReturn(PETSC_SUCCESS);
2100 }
2101 #endif
2102 
2103 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2104 {
2105   Mat              C  = B;
2106   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
2107   Mat_SeqSBAIJ    *b  = (Mat_SeqSBAIJ *)C->data;
2108   IS               ip = b->row, iip = b->icol;
2109   const PetscInt  *rip, *riip;
2110   PetscInt         i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
2111   PetscInt        *ai = a->i, *aj = a->j;
2112   PetscInt         k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2113   MatScalar       *rtmp, *ba = b->a, *bval, dk, uikdi;
2114   PetscBool        perm_identity;
2115   FactorShiftCtx   sctx;
2116   PetscReal        rs;
2117   const MatScalar *aa, *v;
2118   MatScalar        d;
2119 
2120   PetscFunctionBegin;
2121   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2122   /* MatPivotSetUp(): initialize shift context sctx */
2123   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2124 
2125   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2126     sctx.shift_top = info->zeropivot;
2127     for (i = 0; i < mbs; i++) {
2128       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2129       d  = (aa)[a->diag[i]];
2130       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2131       v  = aa + ai[i];
2132       nz = ai[i + 1] - ai[i];
2133       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2134       if (rs > sctx.shift_top) sctx.shift_top = rs;
2135     }
2136     sctx.shift_top *= 1.1;
2137     sctx.nshift_max = 5;
2138     sctx.shift_lo   = 0.;
2139     sctx.shift_hi   = 1.;
2140   }
2141 
2142   PetscCall(ISGetIndices(ip, &rip));
2143   PetscCall(ISGetIndices(iip, &riip));
2144 
2145   /* allocate working arrays
2146      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2147      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2148   */
2149   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
2150 
2151   do {
2152     sctx.newshift = PETSC_FALSE;
2153 
2154     for (i = 0; i < mbs; i++) c2r[i] = mbs;
2155     if (mbs) il[0] = 0;
2156 
2157     for (k = 0; k < mbs; k++) {
2158       /* zero rtmp */
2159       nz    = bi[k + 1] - bi[k];
2160       bjtmp = bj + bi[k];
2161       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2162 
2163       /* load in initial unfactored row */
2164       bval = ba + bi[k];
2165       jmin = ai[rip[k]];
2166       jmax = ai[rip[k] + 1];
2167       for (j = jmin; j < jmax; j++) {
2168         col = riip[aj[j]];
2169         if (col >= k) { /* only take upper triangular entry */
2170           rtmp[col] = aa[j];
2171           *bval++   = 0.0; /* for in-place factorization */
2172         }
2173       }
2174       /* shift the diagonal of the matrix: ZeropivotApply() */
2175       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2176 
2177       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2178       dk = rtmp[k];
2179       i  = c2r[k]; /* first row to be added to k_th row  */
2180 
2181       while (i < k) {
2182         nexti = c2r[i]; /* next row to be added to k_th row */
2183 
2184         /* compute multiplier, update diag(k) and U(i,k) */
2185         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
2186         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2187         dk += uikdi * ba[ili];           /* update diag[k] */
2188         ba[ili] = uikdi;                 /* -U(i,k) */
2189 
2190         /* add multiple of row i to k-th row */
2191         jmin = ili + 1;
2192         jmax = bi[i + 1];
2193         if (jmin < jmax) {
2194           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2195           /* update il and c2r for row i */
2196           il[i]  = jmin;
2197           j      = bj[jmin];
2198           c2r[i] = c2r[j];
2199           c2r[j] = i;
2200         }
2201         i = nexti;
2202       }
2203 
2204       /* copy data into U(k,:) */
2205       rs   = 0.0;
2206       jmin = bi[k];
2207       jmax = bi[k + 1] - 1;
2208       if (jmin < jmax) {
2209         for (j = jmin; j < jmax; j++) {
2210           col   = bj[j];
2211           ba[j] = rtmp[col];
2212           rs += PetscAbsScalar(ba[j]);
2213         }
2214         /* add the k-th row into il and c2r */
2215         il[k]  = jmin;
2216         i      = bj[jmin];
2217         c2r[k] = c2r[i];
2218         c2r[i] = k;
2219       }
2220 
2221       /* MatPivotCheck() */
2222       sctx.rs = rs;
2223       sctx.pv = dk;
2224       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2225       if (sctx.newshift) break;
2226       dk = sctx.pv;
2227 
2228       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2229     }
2230   } while (sctx.newshift);
2231 
2232   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2233   PetscCall(PetscFree3(rtmp, il, c2r));
2234   PetscCall(ISRestoreIndices(ip, &rip));
2235   PetscCall(ISRestoreIndices(iip, &riip));
2236 
2237   PetscCall(ISIdentity(ip, &perm_identity));
2238   if (perm_identity) {
2239     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2240     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2241     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2242     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2243   } else {
2244     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2245     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2246     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2247     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2248   }
2249 
2250   C->assembled    = PETSC_TRUE;
2251   C->preallocated = PETSC_TRUE;
2252 
2253   PetscCall(PetscLogFlops(C->rmap->n));
2254 
2255   /* MatPivotView() */
2256   if (sctx.nshift) {
2257     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2258       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2259     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2260       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2261     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2262       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2263     }
2264   }
2265   PetscFunctionReturn(PETSC_SUCCESS);
2266 }
2267 
2268 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2269 {
2270   Mat              C  = B;
2271   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
2272   Mat_SeqSBAIJ    *b  = (Mat_SeqSBAIJ *)C->data;
2273   IS               ip = b->row, iip = b->icol;
2274   const PetscInt  *rip, *riip;
2275   PetscInt         i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2276   PetscInt        *ai = a->i, *aj = a->j;
2277   PetscInt         k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2278   MatScalar       *rtmp, *ba = b->a, *bval, dk, uikdi;
2279   const MatScalar *aa, *v;
2280   PetscBool        perm_identity;
2281   FactorShiftCtx   sctx;
2282   PetscReal        rs;
2283   MatScalar        d;
2284 
2285   PetscFunctionBegin;
2286   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2287   /* MatPivotSetUp(): initialize shift context sctx */
2288   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2289 
2290   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2291     sctx.shift_top = info->zeropivot;
2292     for (i = 0; i < mbs; i++) {
2293       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2294       d  = (aa)[a->diag[i]];
2295       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2296       v  = aa + ai[i];
2297       nz = ai[i + 1] - ai[i];
2298       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2299       if (rs > sctx.shift_top) sctx.shift_top = rs;
2300     }
2301     sctx.shift_top *= 1.1;
2302     sctx.nshift_max = 5;
2303     sctx.shift_lo   = 0.;
2304     sctx.shift_hi   = 1.;
2305   }
2306 
2307   PetscCall(ISGetIndices(ip, &rip));
2308   PetscCall(ISGetIndices(iip, &riip));
2309 
2310   /* initialization */
2311   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
2312 
2313   do {
2314     sctx.newshift = PETSC_FALSE;
2315 
2316     for (i = 0; i < mbs; i++) jl[i] = mbs;
2317     il[0] = 0;
2318 
2319     for (k = 0; k < mbs; k++) {
2320       /* zero rtmp */
2321       nz    = bi[k + 1] - bi[k];
2322       bjtmp = bj + bi[k];
2323       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2324 
2325       bval = ba + bi[k];
2326       /* initialize k-th row by the perm[k]-th row of A */
2327       jmin = ai[rip[k]];
2328       jmax = ai[rip[k] + 1];
2329       for (j = jmin; j < jmax; j++) {
2330         col = riip[aj[j]];
2331         if (col >= k) { /* only take upper triangular entry */
2332           rtmp[col] = aa[j];
2333           *bval++   = 0.0; /* for in-place factorization */
2334         }
2335       }
2336       /* shift the diagonal of the matrix */
2337       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2338 
2339       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2340       dk = rtmp[k];
2341       i  = jl[k]; /* first row to be added to k_th row  */
2342 
2343       while (i < k) {
2344         nexti = jl[i]; /* next row to be added to k_th row */
2345 
2346         /* compute multiplier, update diag(k) and U(i,k) */
2347         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
2348         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2349         dk += uikdi * ba[ili];
2350         ba[ili] = uikdi; /* -U(i,k) */
2351 
2352         /* add multiple of row i to k-th row */
2353         jmin = ili + 1;
2354         jmax = bi[i + 1];
2355         if (jmin < jmax) {
2356           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2357           /* update il and jl for row i */
2358           il[i] = jmin;
2359           j     = bj[jmin];
2360           jl[i] = jl[j];
2361           jl[j] = i;
2362         }
2363         i = nexti;
2364       }
2365 
2366       /* shift the diagonals when zero pivot is detected */
2367       /* compute rs=sum of abs(off-diagonal) */
2368       rs   = 0.0;
2369       jmin = bi[k] + 1;
2370       nz   = bi[k + 1] - jmin;
2371       bcol = bj + jmin;
2372       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
2373 
2374       sctx.rs = rs;
2375       sctx.pv = dk;
2376       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2377       if (sctx.newshift) break;
2378       dk = sctx.pv;
2379 
2380       /* copy data into U(k,:) */
2381       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2382       jmin      = bi[k] + 1;
2383       jmax      = bi[k + 1];
2384       if (jmin < jmax) {
2385         for (j = jmin; j < jmax; j++) {
2386           col   = bj[j];
2387           ba[j] = rtmp[col];
2388         }
2389         /* add the k-th row into il and jl */
2390         il[k] = jmin;
2391         i     = bj[jmin];
2392         jl[k] = jl[i];
2393         jl[i] = k;
2394       }
2395     }
2396   } while (sctx.newshift);
2397 
2398   PetscCall(PetscFree3(rtmp, il, jl));
2399   PetscCall(ISRestoreIndices(ip, &rip));
2400   PetscCall(ISRestoreIndices(iip, &riip));
2401   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2402 
2403   PetscCall(ISIdentity(ip, &perm_identity));
2404   if (perm_identity) {
2405     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2406     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2407     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2408     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2409   } else {
2410     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2411     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2412     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2413     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2414   }
2415 
2416   C->assembled    = PETSC_TRUE;
2417   C->preallocated = PETSC_TRUE;
2418 
2419   PetscCall(PetscLogFlops(C->rmap->n));
2420   if (sctx.nshift) {
2421     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2422       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2423     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2424       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2425     }
2426   }
2427   PetscFunctionReturn(PETSC_SUCCESS);
2428 }
2429 
2430 /*
2431    icc() under revised new data structure.
2432    Factored arrays bj and ba are stored as
2433      U(0,:),...,U(i,:),U(n-1,:)
2434 
2435    ui=fact->i is an array of size n+1, in which
2436    ui+
2437      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2438      ui[n]:  points to U(n-1,n-1)+1
2439 
2440   udiag=fact->diag is an array of size n,in which
2441      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2442 
2443    U(i,:) contains udiag[i] as its last entry, i.e.,
2444     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2445 */
2446 
2447 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2448 {
2449   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2450   Mat_SeqSBAIJ      *b;
2451   PetscBool          perm_identity, missing;
2452   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2453   const PetscInt    *rip, *riip;
2454   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2455   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2456   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2457   PetscReal          fill = info->fill, levels = info->levels;
2458   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2459   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2460   PetscBT            lnkbt;
2461   IS                 iperm;
2462 
2463   PetscFunctionBegin;
2464   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2465   PetscCall(MatMissingDiagonal(A, &missing, &d));
2466   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2467   PetscCall(ISIdentity(perm, &perm_identity));
2468   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2469 
2470   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2471   PetscCall(PetscMalloc1(am + 1, &udiag));
2472   ui[0] = 0;
2473 
2474   /* ICC(0) without matrix ordering: simply rearrange column indices */
2475   if (!levels && perm_identity) {
2476     for (i = 0; i < am; i++) {
2477       ncols     = ai[i + 1] - a->diag[i];
2478       ui[i + 1] = ui[i] + ncols;
2479       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2480     }
2481     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2482     cols = uj;
2483     for (i = 0; i < am; i++) {
2484       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2485       ncols = ai[i + 1] - a->diag[i] - 1;
2486       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2487       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2488     }
2489   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2490     PetscCall(ISGetIndices(iperm, &riip));
2491     PetscCall(ISGetIndices(perm, &rip));
2492 
2493     /* initialization */
2494     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2495 
2496     /* jl: linked list for storing indices of the pivot rows
2497        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2498     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2499     for (i = 0; i < am; i++) {
2500       jl[i] = am;
2501       il[i] = 0;
2502     }
2503 
2504     /* create and initialize a linked list for storing column indices of the active row k */
2505     nlnk = am + 1;
2506     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2507 
2508     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2509     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2510     current_space = free_space;
2511     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2512     current_space_lvl = free_space_lvl;
2513 
2514     for (k = 0; k < am; k++) { /* for each active row k */
2515       /* initialize lnk by the column indices of row rip[k] of A */
2516       nzk   = 0;
2517       ncols = ai[rip[k] + 1] - ai[rip[k]];
2518       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2519       ncols_upper = 0;
2520       for (j = 0; j < ncols; j++) {
2521         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2522         if (riip[i] >= k) {         /* only take upper triangular entry */
2523           ajtmp[ncols_upper] = i;
2524           ncols_upper++;
2525         }
2526       }
2527       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2528       nzk += nlnk;
2529 
2530       /* update lnk by computing fill-in for each pivot row to be merged in */
2531       prow = jl[k]; /* 1st pivot row */
2532 
2533       while (prow < k) {
2534         nextprow = jl[prow];
2535 
2536         /* merge prow into k-th row */
2537         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2538         jmax  = ui[prow + 1];
2539         ncols = jmax - jmin;
2540         i     = jmin - ui[prow];
2541         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2542         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2543         j     = *(uj - 1);
2544         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2545         nzk += nlnk;
2546 
2547         /* update il and jl for prow */
2548         if (jmin < jmax) {
2549           il[prow] = jmin;
2550           j        = *cols;
2551           jl[prow] = jl[j];
2552           jl[j]    = prow;
2553         }
2554         prow = nextprow;
2555       }
2556 
2557       /* if free space is not available, make more free space */
2558       if (current_space->local_remaining < nzk) {
2559         i = am - k + 1;                                    /* num of unfactored rows */
2560         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2561         PetscCall(PetscFreeSpaceGet(i, &current_space));
2562         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2563         reallocs++;
2564       }
2565 
2566       /* copy data into free_space and free_space_lvl, then initialize lnk */
2567       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2568       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2569 
2570       /* add the k-th row into il and jl */
2571       if (nzk > 1) {
2572         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2573         jl[k] = jl[i];
2574         jl[i] = k;
2575         il[k] = ui[k] + 1;
2576       }
2577       uj_ptr[k]     = current_space->array;
2578       uj_lvl_ptr[k] = current_space_lvl->array;
2579 
2580       current_space->array += nzk;
2581       current_space->local_used += nzk;
2582       current_space->local_remaining -= nzk;
2583 
2584       current_space_lvl->array += nzk;
2585       current_space_lvl->local_used += nzk;
2586       current_space_lvl->local_remaining -= nzk;
2587 
2588       ui[k + 1] = ui[k] + nzk;
2589     }
2590 
2591     PetscCall(ISRestoreIndices(perm, &rip));
2592     PetscCall(ISRestoreIndices(iperm, &riip));
2593     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2594     PetscCall(PetscFree(ajtmp));
2595 
2596     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2597     PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2598     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
2599     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2600     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2601 
2602   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2603 
2604   /* put together the new matrix in MATSEQSBAIJ format */
2605   b          = (Mat_SeqSBAIJ *)fact->data;
2606   b->free_ij = PETSC_TRUE;
2607   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
2608   b->free_a    = PETSC_TRUE;
2609   b->j         = uj;
2610   b->i         = ui;
2611   b->diag      = udiag;
2612   b->free_diag = PETSC_TRUE;
2613   b->ilen      = NULL;
2614   b->imax      = NULL;
2615   b->row       = perm;
2616   b->col       = perm;
2617   PetscCall(PetscObjectReference((PetscObject)perm));
2618   PetscCall(PetscObjectReference((PetscObject)perm));
2619   b->icol          = iperm;
2620   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2621 
2622   PetscCall(PetscMalloc1(am, &b->solve_work));
2623 
2624   b->maxnz = b->nz = ui[am];
2625 
2626   fact->info.factor_mallocs   = reallocs;
2627   fact->info.fill_ratio_given = fill;
2628   if (ai[am] != 0) {
2629     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2630     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2631   } else {
2632     fact->info.fill_ratio_needed = 0.0;
2633   }
2634 #if defined(PETSC_USE_INFO)
2635   if (ai[am] != 0) {
2636     PetscReal af = fact->info.fill_ratio_needed;
2637     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2638     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2639     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2640   } else {
2641     PetscCall(PetscInfo(A, "Empty matrix\n"));
2642   }
2643 #endif
2644   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2645   PetscFunctionReturn(PETSC_SUCCESS);
2646 }
2647 
2648 #if 0
2649 // unused
2650 static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2651 {
2652   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2653   Mat_SeqSBAIJ      *b;
2654   PetscBool          perm_identity, missing;
2655   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2656   const PetscInt    *rip, *riip;
2657   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2658   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2659   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2660   PetscReal          fill = info->fill, levels = info->levels;
2661   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2662   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2663   PetscBT            lnkbt;
2664   IS                 iperm;
2665 
2666   PetscFunctionBegin;
2667   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2668   PetscCall(MatMissingDiagonal(A, &missing, &d));
2669   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2670   PetscCall(ISIdentity(perm, &perm_identity));
2671   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2672 
2673   PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui));
2674   PetscCall(PetscMalloc1(am + 1, &udiag));
2675   ui[0] = 0;
2676 
2677   /* ICC(0) without matrix ordering: simply copies fill pattern */
2678   if (!levels && perm_identity) {
2679     for (i = 0; i < am; i++) {
2680       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2681       udiag[i]  = ui[i];
2682     }
2683     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2684     cols = uj;
2685     for (i = 0; i < am; i++) {
2686       aj    = a->j + a->diag[i];
2687       ncols = ui[i + 1] - ui[i];
2688       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2689     }
2690   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2691     PetscCall(ISGetIndices(iperm, &riip));
2692     PetscCall(ISGetIndices(perm, &rip));
2693 
2694     /* initialization */
2695     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2696 
2697     /* jl: linked list for storing indices of the pivot rows
2698        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2699     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2700     for (i = 0; i < am; i++) {
2701       jl[i] = am;
2702       il[i] = 0;
2703     }
2704 
2705     /* create and initialize a linked list for storing column indices of the active row k */
2706     nlnk = am + 1;
2707     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2708 
2709     /* initial FreeSpace size is fill*(ai[am]+1) */
2710     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2711     current_space = free_space;
2712     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2713     current_space_lvl = free_space_lvl;
2714 
2715     for (k = 0; k < am; k++) { /* for each active row k */
2716       /* initialize lnk by the column indices of row rip[k] of A */
2717       nzk   = 0;
2718       ncols = ai[rip[k] + 1] - ai[rip[k]];
2719       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2720       ncols_upper = 0;
2721       for (j = 0; j < ncols; j++) {
2722         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2723         if (riip[i] >= k) {         /* only take upper triangular entry */
2724           ajtmp[ncols_upper] = i;
2725           ncols_upper++;
2726         }
2727       }
2728       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2729       nzk += nlnk;
2730 
2731       /* update lnk by computing fill-in for each pivot row to be merged in */
2732       prow = jl[k]; /* 1st pivot row */
2733 
2734       while (prow < k) {
2735         nextprow = jl[prow];
2736 
2737         /* merge prow into k-th row */
2738         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2739         jmax  = ui[prow + 1];
2740         ncols = jmax - jmin;
2741         i     = jmin - ui[prow];
2742         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2743         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2744         j     = *(uj - 1);
2745         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2746         nzk += nlnk;
2747 
2748         /* update il and jl for prow */
2749         if (jmin < jmax) {
2750           il[prow] = jmin;
2751           j        = *cols;
2752           jl[prow] = jl[j];
2753           jl[j]    = prow;
2754         }
2755         prow = nextprow;
2756       }
2757 
2758       /* if free space is not available, make more free space */
2759       if (current_space->local_remaining < nzk) {
2760         i = am - k + 1;                                    /* num of unfactored rows */
2761         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2762         PetscCall(PetscFreeSpaceGet(i, &current_space));
2763         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2764         reallocs++;
2765       }
2766 
2767       /* copy data into free_space and free_space_lvl, then initialize lnk */
2768       PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2769       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2770 
2771       /* add the k-th row into il and jl */
2772       if (nzk > 1) {
2773         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2774         jl[k] = jl[i];
2775         jl[i] = k;
2776         il[k] = ui[k] + 1;
2777       }
2778       uj_ptr[k]     = current_space->array;
2779       uj_lvl_ptr[k] = current_space_lvl->array;
2780 
2781       current_space->array += nzk;
2782       current_space->local_used += nzk;
2783       current_space->local_remaining -= nzk;
2784 
2785       current_space_lvl->array += nzk;
2786       current_space_lvl->local_used += nzk;
2787       current_space_lvl->local_remaining -= nzk;
2788 
2789       ui[k + 1] = ui[k] + nzk;
2790     }
2791 
2792   #if defined(PETSC_USE_INFO)
2793     if (ai[am] != 0) {
2794       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2795       PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2796       PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2797       PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2798     } else {
2799       PetscCall(PetscInfo(A, "Empty matrix\n"));
2800     }
2801   #endif
2802 
2803     PetscCall(ISRestoreIndices(perm, &rip));
2804     PetscCall(ISRestoreIndices(iperm, &riip));
2805     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2806     PetscCall(PetscFree(ajtmp));
2807 
2808     /* destroy list of free space and other temporary array(s) */
2809     PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj));
2810     PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2811     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2812     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2813 
2814   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2815 
2816   /* put together the new matrix in MATSEQSBAIJ format */
2817 
2818   b               = (Mat_SeqSBAIJ *)fact->data;
2819   b->free_ij       = PETSC_TRUE;
2820   PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a));
2821   b->free_a        = PETSC_TRUE;
2822   b->j         = uj;
2823   b->i         = ui;
2824   b->diag      = udiag;
2825   b->free_diag = PETSC_TRUE;
2826   b->ilen      = NULL;
2827   b->imax      = NULL;
2828   b->row       = perm;
2829   b->col       = perm;
2830 
2831   PetscCall(PetscObjectReference((PetscObject)perm));
2832   PetscCall(PetscObjectReference((PetscObject)perm));
2833 
2834   b->icol          = iperm;
2835   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2836   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2837   b->maxnz = b->nz = ui[am];
2838 
2839   fact->info.factor_mallocs   = reallocs;
2840   fact->info.fill_ratio_given = fill;
2841   if (ai[am] != 0) {
2842     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2843   } else {
2844     fact->info.fill_ratio_needed = 0.0;
2845   }
2846   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2847   PetscFunctionReturn(PETSC_SUCCESS);
2848 }
2849 #endif
2850 
2851 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2852 {
2853   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2854   Mat_SeqSBAIJ      *b;
2855   PetscBool          perm_identity, missing;
2856   PetscReal          fill = info->fill;
2857   const PetscInt    *rip, *riip;
2858   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2859   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2860   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2861   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2862   PetscBT            lnkbt;
2863   IS                 iperm;
2864 
2865   PetscFunctionBegin;
2866   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2867   PetscCall(MatMissingDiagonal(A, &missing, &i));
2868   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2869 
2870   /* check whether perm is the identity mapping */
2871   PetscCall(ISIdentity(perm, &perm_identity));
2872   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2873   PetscCall(ISGetIndices(iperm, &riip));
2874   PetscCall(ISGetIndices(perm, &rip));
2875 
2876   /* initialization */
2877   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2878   PetscCall(PetscMalloc1(am + 1, &udiag));
2879   ui[0] = 0;
2880 
2881   /* jl: linked list for storing indices of the pivot rows
2882      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2883   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2884   for (i = 0; i < am; i++) {
2885     jl[i] = am;
2886     il[i] = 0;
2887   }
2888 
2889   /* create and initialize a linked list for storing column indices of the active row k */
2890   nlnk = am + 1;
2891   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2892 
2893   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2894   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2895   current_space = free_space;
2896 
2897   for (k = 0; k < am; k++) { /* for each active row k */
2898     /* initialize lnk by the column indices of row rip[k] of A */
2899     nzk   = 0;
2900     ncols = ai[rip[k] + 1] - ai[rip[k]];
2901     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2902     ncols_upper = 0;
2903     for (j = 0; j < ncols; j++) {
2904       i = riip[*(aj + ai[rip[k]] + j)];
2905       if (i >= k) { /* only take upper triangular entry */
2906         cols[ncols_upper] = i;
2907         ncols_upper++;
2908       }
2909     }
2910     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2911     nzk += nlnk;
2912 
2913     /* update lnk by computing fill-in for each pivot row to be merged in */
2914     prow = jl[k]; /* 1st pivot row */
2915 
2916     while (prow < k) {
2917       nextprow = jl[prow];
2918       /* merge prow into k-th row */
2919       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2920       jmax   = ui[prow + 1];
2921       ncols  = jmax - jmin;
2922       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2923       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2924       nzk += nlnk;
2925 
2926       /* update il and jl for prow */
2927       if (jmin < jmax) {
2928         il[prow] = jmin;
2929         j        = *uj_ptr;
2930         jl[prow] = jl[j];
2931         jl[j]    = prow;
2932       }
2933       prow = nextprow;
2934     }
2935 
2936     /* if free space is not available, make more free space */
2937     if (current_space->local_remaining < nzk) {
2938       i = am - k + 1;                                    /* num of unfactored rows */
2939       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2940       PetscCall(PetscFreeSpaceGet(i, &current_space));
2941       reallocs++;
2942     }
2943 
2944     /* copy data into free space, then initialize lnk */
2945     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2946 
2947     /* add the k-th row into il and jl */
2948     if (nzk > 1) {
2949       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2950       jl[k] = jl[i];
2951       jl[i] = k;
2952       il[k] = ui[k] + 1;
2953     }
2954     ui_ptr[k] = current_space->array;
2955 
2956     current_space->array += nzk;
2957     current_space->local_used += nzk;
2958     current_space->local_remaining -= nzk;
2959 
2960     ui[k + 1] = ui[k] + nzk;
2961   }
2962 
2963   PetscCall(ISRestoreIndices(perm, &rip));
2964   PetscCall(ISRestoreIndices(iperm, &riip));
2965   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2966 
2967   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2968   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscInt), (void **)&uj));
2969   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2970   PetscCall(PetscLLDestroy(lnk, lnkbt));
2971 
2972   /* put together the new matrix in MATSEQSBAIJ format */
2973   b          = (Mat_SeqSBAIJ *)fact->data;
2974   b->free_ij = PETSC_TRUE;
2975   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
2976   b->free_a    = PETSC_TRUE;
2977   b->j         = uj;
2978   b->i         = ui;
2979   b->diag      = udiag;
2980   b->free_diag = PETSC_TRUE;
2981   b->ilen      = NULL;
2982   b->imax      = NULL;
2983   b->row       = perm;
2984   b->col       = perm;
2985 
2986   PetscCall(PetscObjectReference((PetscObject)perm));
2987   PetscCall(PetscObjectReference((PetscObject)perm));
2988 
2989   b->icol          = iperm;
2990   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2991 
2992   PetscCall(PetscMalloc1(am, &b->solve_work));
2993 
2994   b->maxnz = b->nz = ui[am];
2995 
2996   fact->info.factor_mallocs   = reallocs;
2997   fact->info.fill_ratio_given = fill;
2998   if (ai[am] != 0) {
2999     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
3000     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
3001   } else {
3002     fact->info.fill_ratio_needed = 0.0;
3003   }
3004 #if defined(PETSC_USE_INFO)
3005   if (ai[am] != 0) {
3006     PetscReal af = fact->info.fill_ratio_needed;
3007     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3008     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3009     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3010   } else {
3011     PetscCall(PetscInfo(A, "Empty matrix\n"));
3012   }
3013 #endif
3014   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
3015   PetscFunctionReturn(PETSC_SUCCESS);
3016 }
3017 
3018 #if 0
3019 // unused
3020 static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
3021 {
3022   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
3023   Mat_SeqSBAIJ      *b;
3024   PetscBool          perm_identity, missing;
3025   PetscReal          fill = info->fill;
3026   const PetscInt    *rip, *riip;
3027   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
3028   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
3029   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
3030   PetscFreeSpaceList free_space = NULL, current_space = NULL;
3031   PetscBT            lnkbt;
3032   IS                 iperm;
3033 
3034   PetscFunctionBegin;
3035   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
3036   PetscCall(MatMissingDiagonal(A, &missing, &i));
3037   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3038 
3039   /* check whether perm is the identity mapping */
3040   PetscCall(ISIdentity(perm, &perm_identity));
3041   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
3042   PetscCall(ISGetIndices(iperm, &riip));
3043   PetscCall(ISGetIndices(perm, &rip));
3044 
3045   /* initialization */
3046   PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui));
3047   ui[0] = 0;
3048 
3049   /* jl: linked list for storing indices of the pivot rows
3050      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3051   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
3052   for (i = 0; i < am; i++) {
3053     jl[i] = am;
3054     il[i] = 0;
3055   }
3056 
3057   /* create and initialize a linked list for storing column indices of the active row k */
3058   nlnk = am + 1;
3059   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
3060 
3061   /* initial FreeSpace size is fill*(ai[am]+1) */
3062   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
3063   current_space = free_space;
3064 
3065   for (k = 0; k < am; k++) { /* for each active row k */
3066     /* initialize lnk by the column indices of row rip[k] of A */
3067     nzk   = 0;
3068     ncols = ai[rip[k] + 1] - ai[rip[k]];
3069     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
3070     ncols_upper = 0;
3071     for (j = 0; j < ncols; j++) {
3072       i = riip[*(aj + ai[rip[k]] + j)];
3073       if (i >= k) { /* only take upper triangular entry */
3074         cols[ncols_upper] = i;
3075         ncols_upper++;
3076       }
3077     }
3078     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
3079     nzk += nlnk;
3080 
3081     /* update lnk by computing fill-in for each pivot row to be merged in */
3082     prow = jl[k]; /* 1st pivot row */
3083 
3084     while (prow < k) {
3085       nextprow = jl[prow];
3086       /* merge prow into k-th row */
3087       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3088       jmax   = ui[prow + 1];
3089       ncols  = jmax - jmin;
3090       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3091       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
3092       nzk += nlnk;
3093 
3094       /* update il and jl for prow */
3095       if (jmin < jmax) {
3096         il[prow] = jmin;
3097         j        = *uj_ptr;
3098         jl[prow] = jl[j];
3099         jl[j]    = prow;
3100       }
3101       prow = nextprow;
3102     }
3103 
3104     /* if free space is not available, make more free space */
3105     if (current_space->local_remaining < nzk) {
3106       i = am - k + 1;                     /* num of unfactored rows */
3107       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3108       PetscCall(PetscFreeSpaceGet(i, &current_space));
3109       reallocs++;
3110     }
3111 
3112     /* copy data into free space, then initialize lnk */
3113     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
3114 
3115     /* add the k-th row into il and jl */
3116     if (nzk - 1 > 0) {
3117       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3118       jl[k] = jl[i];
3119       jl[i] = k;
3120       il[k] = ui[k] + 1;
3121     }
3122     ui_ptr[k] = current_space->array;
3123 
3124     current_space->array += nzk;
3125     current_space->local_used += nzk;
3126     current_space->local_remaining -= nzk;
3127 
3128     ui[k + 1] = ui[k] + nzk;
3129   }
3130 
3131   #if defined(PETSC_USE_INFO)
3132   if (ai[am] != 0) {
3133     PetscReal af = (PetscReal)ui[am] / (PetscReal)ai[am];
3134     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3135     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3136     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3137   } else {
3138     PetscCall(PetscInfo(A, "Empty matrix\n"));
3139   }
3140   #endif
3141 
3142   PetscCall(ISRestoreIndices(perm, &rip));
3143   PetscCall(ISRestoreIndices(iperm, &riip));
3144   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
3145 
3146   /* destroy list of free space and other temporary array(s) */
3147   PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj));
3148   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3149   PetscCall(PetscLLDestroy(lnk, lnkbt));
3150 
3151   /* put together the new matrix in MATSEQSBAIJ format */
3152   b               = (Mat_SeqSBAIJ *)fact->data;
3153   b->free_ij      = PETSC_TRUE;
3154   PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a));
3155   b->free_a       = PETSC_TRUE;
3156   b->j    = uj;
3157   b->i    = ui;
3158   b->diag = NULL;
3159   b->ilen = NULL;
3160   b->imax = NULL;
3161   b->row  = perm;
3162   b->col  = perm;
3163 
3164   PetscCall(PetscObjectReference((PetscObject)perm));
3165   PetscCall(PetscObjectReference((PetscObject)perm));
3166 
3167   b->icol          = iperm;
3168   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3169 
3170   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3171   b->maxnz = b->nz = ui[am];
3172 
3173   fact->info.factor_mallocs   = reallocs;
3174   fact->info.fill_ratio_given = fill;
3175   if (ai[am] != 0) {
3176     fact->info.fill_ratio_needed = (PetscReal)ui[am] / (PetscReal)ai[am];
3177   } else {
3178     fact->info.fill_ratio_needed = 0.0;
3179   }
3180   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3181   PetscFunctionReturn(PETSC_SUCCESS);
3182 }
3183 #endif
3184 
3185 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3186 {
3187   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3188   PetscInt           n  = A->rmap->n;
3189   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3190   PetscScalar       *x, sum;
3191   const PetscScalar *b;
3192   const MatScalar   *aa, *v;
3193   PetscInt           i, nz;
3194 
3195   PetscFunctionBegin;
3196   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3197 
3198   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
3199   PetscCall(VecGetArrayRead(bb, &b));
3200   PetscCall(VecGetArrayWrite(xx, &x));
3201 
3202   /* forward solve the lower triangular */
3203   x[0] = b[0];
3204   v    = aa;
3205   vi   = aj;
3206   for (i = 1; i < n; i++) {
3207     nz  = ai[i + 1] - ai[i];
3208     sum = b[i];
3209     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3210     v += nz;
3211     vi += nz;
3212     x[i] = sum;
3213   }
3214 
3215   /* backward solve the upper triangular */
3216   for (i = n - 1; i >= 0; i--) {
3217     v   = aa + adiag[i + 1] + 1;
3218     vi  = aj + adiag[i + 1] + 1;
3219     nz  = adiag[i] - adiag[i + 1] - 1;
3220     sum = x[i];
3221     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3222     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3223   }
3224 
3225   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3226   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
3227   PetscCall(VecRestoreArrayRead(bb, &b));
3228   PetscCall(VecRestoreArrayWrite(xx, &x));
3229   PetscFunctionReturn(PETSC_SUCCESS);
3230 }
3231 
3232 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3233 {
3234   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3235   IS                 iscol = a->col, isrow = a->row;
3236   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3237   const PetscInt    *rout, *cout, *r, *c;
3238   PetscScalar       *x, *tmp, sum;
3239   const PetscScalar *b;
3240   const MatScalar   *aa, *v;
3241 
3242   PetscFunctionBegin;
3243   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3244 
3245   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
3246   PetscCall(VecGetArrayRead(bb, &b));
3247   PetscCall(VecGetArrayWrite(xx, &x));
3248   tmp = a->solve_work;
3249 
3250   PetscCall(ISGetIndices(isrow, &rout));
3251   r = rout;
3252   PetscCall(ISGetIndices(iscol, &cout));
3253   c = cout;
3254 
3255   /* forward solve the lower triangular */
3256   tmp[0] = b[r[0]];
3257   v      = aa;
3258   vi     = aj;
3259   for (i = 1; i < n; i++) {
3260     nz  = ai[i + 1] - ai[i];
3261     sum = b[r[i]];
3262     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3263     tmp[i] = sum;
3264     v += nz;
3265     vi += nz;
3266   }
3267 
3268   /* backward solve the upper triangular */
3269   for (i = n - 1; i >= 0; i--) {
3270     v   = aa + adiag[i + 1] + 1;
3271     vi  = aj + adiag[i + 1] + 1;
3272     nz  = adiag[i] - adiag[i + 1] - 1;
3273     sum = tmp[i];
3274     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3275     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3276   }
3277 
3278   PetscCall(ISRestoreIndices(isrow, &rout));
3279   PetscCall(ISRestoreIndices(iscol, &cout));
3280   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
3281   PetscCall(VecRestoreArrayRead(bb, &b));
3282   PetscCall(VecRestoreArrayWrite(xx, &x));
3283   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3284   PetscFunctionReturn(PETSC_SUCCESS);
3285 }
3286 
3287 #if 0
3288 // unused
3289 /*
3290     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3291 */
3292 static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3293 {
3294   Mat             B = *fact;
3295   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3296   IS              isicol;
3297   const PetscInt *r, *ic;
3298   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3299   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3300   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3301   PetscInt        nlnk, *lnk;
3302   PetscBT         lnkbt;
3303   PetscBool       row_identity, icol_identity;
3304   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3305   const PetscInt *ics;
3306   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3307   PetscReal       dt = info->dt, shift = info->shiftamount;
3308   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3309   PetscBool       missing;
3310 
3311   PetscFunctionBegin;
3312   if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005;
3313   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3314 
3315   /* symbolic factorization, can be reused */
3316   PetscCall(MatMissingDiagonal(A, &missing, &i));
3317   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3318   adiag = a->diag;
3319 
3320   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3321 
3322   /* bdiag is location of diagonal in factor */
3323   PetscCall(PetscMalloc1(n + 1, &bdiag));     /* becomes b->diag */
3324   PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */
3325 
3326   /* allocate row pointers bi */
3327   PetscCall(PetscShmgetAllocateArray(2 * n + 2,sizeof(PetscInt),(void **)&bi));
3328 
3329   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3330   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3331   nnz_max = ai[n] + 2 * n * dtcount + 2;
3332 
3333   PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscInt),(void **)&bj));
3334   PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscScalar),(void **)&ba));
3335 
3336   /* put together the new matrix */
3337   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3338   b = (Mat_SeqAIJ *)B->data;
3339   b->free_a       = PETSC_TRUE;
3340   b->free_ij      = PETSC_TRUE;
3341   b->a    = ba;
3342   b->j    = bj;
3343   b->i    = bi;
3344   b->diag = bdiag;
3345   b->ilen = NULL;
3346   b->imax = NULL;
3347   b->row  = isrow;
3348   b->col  = iscol;
3349   PetscCall(PetscObjectReference((PetscObject)isrow));
3350   PetscCall(PetscObjectReference((PetscObject)iscol));
3351   b->icol = isicol;
3352 
3353   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3354   b->maxnz = nnz_max;
3355 
3356   B->factortype            = MAT_FACTOR_ILUDT;
3357   B->info.factor_mallocs   = 0;
3358   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3359   /*  end of symbolic factorization */
3360 
3361   PetscCall(ISGetIndices(isrow, &r));
3362   PetscCall(ISGetIndices(isicol, &ic));
3363   ics = ic;
3364 
3365   /* linked list for storing column indices of the active row */
3366   nlnk = n + 1;
3367   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
3368 
3369   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3370   PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3371   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3372   PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3373   PetscCall(PetscArrayzero(rtmp, n));
3374 
3375   bi[0]         = 0;
3376   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3377   bdiag_rev[n]  = bdiag[0];
3378   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3379   for (i = 0; i < n; i++) {
3380     /* copy initial fill into linked list */
3381     nzi = ai[r[i] + 1] - ai[r[i]];
3382     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3383     nzi_al = adiag[r[i]] - ai[r[i]];
3384     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3385     ajtmp  = aj + ai[r[i]];
3386     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3387 
3388     /* load in initial (unfactored row) */
3389     aatmp = a->a + ai[r[i]];
3390     for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;
3391 
3392     /* add pivot rows into linked list */
3393     row = lnk[n];
3394     while (row < i) {
3395       nzi_bl = bi[row + 1] - bi[row] + 1;
3396       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3397       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3398       nzi += nlnk;
3399       row = lnk[row];
3400     }
3401 
3402     /* copy data from lnk into jtmp, then initialize lnk */
3403     PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));
3404 
3405     /* numerical factorization */
3406     bjtmp = jtmp;
3407     row   = *bjtmp++; /* 1st pivot row */
3408     while (row < i) {
3409       pc         = rtmp + row;
3410       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3411       multiplier = (*pc) * (*pv);
3412       *pc        = multiplier;
3413       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3414         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3415         pv = ba + bdiag[row + 1] + 1;
3416         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3417         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3418         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3419       }
3420       row = *bjtmp++;
3421     }
3422 
3423     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3424     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3425     nzi_bl   = 0;
3426     j        = 0;
3427     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3428       vtmp[j]       = rtmp[jtmp[j]];
3429       rtmp[jtmp[j]] = 0.0;
3430       nzi_bl++;
3431       j++;
3432     }
3433     nzi_bu = nzi - nzi_bl - 1;
3434     while (j < nzi) {
3435       vtmp[j]       = rtmp[jtmp[j]];
3436       rtmp[jtmp[j]] = 0.0;
3437       j++;
3438     }
3439 
3440     bjtmp = bj + bi[i];
3441     batmp = ba + bi[i];
3442     /* apply level dropping rule to L part */
3443     ncut = nzi_al + dtcount;
3444     if (ncut < nzi_bl) {
3445       PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3446       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3447     } else {
3448       ncut = nzi_bl;
3449     }
3450     for (j = 0; j < ncut; j++) {
3451       bjtmp[j] = jtmp[j];
3452       batmp[j] = vtmp[j];
3453     }
3454     bi[i + 1] = bi[i] + ncut;
3455     nzi       = ncut + 1;
3456 
3457     /* apply level dropping rule to U part */
3458     ncut = nzi_au + dtcount;
3459     if (ncut < nzi_bu) {
3460       PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3461       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3462     } else {
3463       ncut = nzi_bu;
3464     }
3465     nzi += ncut;
3466 
3467     /* mark bdiagonal */
3468     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3469     bdiag_rev[n - i - 1] = bdiag[i + 1];
3470     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
3471     bjtmp                = bj + bdiag[i];
3472     batmp                = ba + bdiag[i];
3473     *bjtmp               = i;
3474     *batmp               = diag_tmp; /* rtmp[i]; */
3475     if (*batmp == 0.0) *batmp = dt + shift;
3476     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
3477 
3478     bjtmp = bj + bdiag[i + 1] + 1;
3479     batmp = ba + bdiag[i + 1] + 1;
3480     for (k = 0; k < ncut; k++) {
3481       bjtmp[k] = jtmp[nzi_bl + 1 + k];
3482       batmp[k] = vtmp[nzi_bl + 1 + k];
3483     }
3484 
3485     im[i] = nzi; /* used by PetscLLAddSortedLU() */
3486   }              /* for (i=0; i<n; i++) */
3487   PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);
3488 
3489   PetscCall(ISRestoreIndices(isrow, &r));
3490   PetscCall(ISRestoreIndices(isicol, &ic));
3491 
3492   PetscCall(PetscLLDestroy(lnk, lnkbt));
3493   PetscCall(PetscFree2(im, jtmp));
3494   PetscCall(PetscFree2(rtmp, vtmp));
3495   PetscCall(PetscFree(bdiag_rev));
3496 
3497   PetscCall(PetscLogFlops(B->cmap->n));
3498   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3499 
3500   PetscCall(ISIdentity(isrow, &row_identity));
3501   PetscCall(ISIdentity(isicol, &icol_identity));
3502   if (row_identity && icol_identity) {
3503     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3504   } else {
3505     B->ops->solve = MatSolve_SeqAIJ;
3506   }
3507 
3508   B->ops->solveadd          = NULL;
3509   B->ops->solvetranspose    = NULL;
3510   B->ops->solvetransposeadd = NULL;
3511   B->ops->matsolve          = NULL;
3512   B->assembled              = PETSC_TRUE;
3513   B->preallocated           = PETSC_TRUE;
3514   PetscFunctionReturn(PETSC_SUCCESS);
3515 }
3516 
3517 /* a wrapper of MatILUDTFactor_SeqAIJ() */
3518 /*
3519     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3520 */
3521 
3522 static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3523 {
3524   PetscFunctionBegin;
3525   PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3526   PetscFunctionReturn(PETSC_SUCCESS);
3527 }
3528 
3529 /*
3530    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3531    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3532 */
3533 /*
3534     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3535 */
3536 
3537 static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3538 {
3539   Mat             C = fact;
3540   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3541   IS              isrow = b->row, isicol = b->icol;
3542   const PetscInt *r, *ic, *ics;
3543   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3544   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3545   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3546   PetscReal       dt = info->dt, shift = info->shiftamount;
3547   PetscBool       row_identity, col_identity;
3548 
3549   PetscFunctionBegin;
3550   PetscCall(ISGetIndices(isrow, &r));
3551   PetscCall(ISGetIndices(isicol, &ic));
3552   PetscCall(PetscMalloc1(n + 1, &rtmp));
3553   ics = ic;
3554 
3555   for (i = 0; i < n; i++) {
3556     /* initialize rtmp array */
3557     nzl   = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */
3558     bjtmp = bj + bi[i];
3559     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3560     rtmp[i] = 0.0;
3561     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */
3562     bjtmp   = bj + bdiag[i + 1] + 1;
3563     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
3564 
3565     /* load in initial unfactored row of A */
3566     nz    = ai[r[i] + 1] - ai[r[i]];
3567     ajtmp = aj + ai[r[i]];
3568     v     = aa + ai[r[i]];
3569     for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];
3570 
3571     /* numerical factorization */
3572     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3573     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3574     k     = 0;
3575     while (k < nzl) {
3576       row        = *bjtmp++;
3577       pc         = rtmp + row;
3578       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3579       multiplier = (*pc) * (*pv);
3580       *pc        = multiplier;
3581       if (PetscAbsScalar(multiplier) > dt) {
3582         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3583         pv = b->a + bdiag[row + 1] + 1;
3584         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3585         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3586         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3587       }
3588       k++;
3589     }
3590 
3591     /* finished row so stick it into b->a */
3592     /* L-part */
3593     pv  = b->a + bi[i];
3594     pj  = bj + bi[i];
3595     nzl = bi[i + 1] - bi[i];
3596     for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];
3597 
3598     /* diagonal: invert diagonal entries for simpler triangular solves */
3599     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3600     b->a[bdiag[i]] = 1.0 / rtmp[i];
3601 
3602     /* U-part */
3603     pv  = b->a + bdiag[i + 1] + 1;
3604     pj  = bj + bdiag[i + 1] + 1;
3605     nzu = bdiag[i] - bdiag[i + 1] - 1;
3606     for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3607   }
3608 
3609   PetscCall(PetscFree(rtmp));
3610   PetscCall(ISRestoreIndices(isicol, &ic));
3611   PetscCall(ISRestoreIndices(isrow, &r));
3612 
3613   PetscCall(ISIdentity(isrow, &row_identity));
3614   PetscCall(ISIdentity(isicol, &col_identity));
3615   if (row_identity && col_identity) {
3616     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3617   } else {
3618     C->ops->solve = MatSolve_SeqAIJ;
3619   }
3620   C->ops->solveadd          = NULL;
3621   C->ops->solvetranspose    = NULL;
3622   C->ops->solvetransposeadd = NULL;
3623   C->ops->matsolve          = NULL;
3624   C->assembled              = PETSC_TRUE;
3625   C->preallocated           = PETSC_TRUE;
3626 
3627   PetscCall(PetscLogFlops(C->cmap->n));
3628   PetscFunctionReturn(PETSC_SUCCESS);
3629 }
3630 #endif
3631