xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision e91c04dfc8a52dee1965211bb1cc8e5bf775178f)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/sbaij/seq/sbaij.h>
3 #include <petscbt.h>
4 #include <../src/mat/utils/freespace.h>
5 
6 /*
7       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
8 
9       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
10 */
11 #if 0
12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
13 {
14   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
15   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
16   const PetscInt    *ai = a->i, *aj = a->j;
17   const PetscScalar *aa = a->a;
18   PetscBool         *done;
19   PetscReal          best, past = 0, future;
20 
21   PetscFunctionBegin;
22   /* pick initial row */
23   best = -1;
24   for (i = 0; i < n; i++) {
25     future = 0.0;
26     for (j = ai[i]; j < ai[i + 1]; j++) {
27       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
28       else past = PetscAbsScalar(aa[j]);
29     }
30     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
31     if (past / future > best) {
32       best    = past / future;
33       current = i;
34     }
35   }
36 
37   PetscCall(PetscMalloc1(n, &done));
38   PetscCall(PetscArrayzero(done, n));
39   PetscCall(PetscMalloc1(n, &order));
40   order[0] = current;
41   for (i = 0; i < n - 1; i++) {
42     done[current] = PETSC_TRUE;
43     best          = -1;
44     /* loop over all neighbors of current pivot */
45     for (j = ai[current]; j < ai[current + 1]; j++) {
46       jj = aj[j];
47       if (done[jj]) continue;
48       /* loop over columns of potential next row computing weights for below and above diagonal */
49       past = future = 0.0;
50       for (k = ai[jj]; k < ai[jj + 1]; k++) {
51         kk = aj[k];
52         if (done[kk]) past += PetscAbsScalar(aa[k]);
53         else if (kk != jj) future += PetscAbsScalar(aa[k]);
54       }
55       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
56       if (past / future > best) {
57         best       = past / future;
58         newcurrent = jj;
59       }
60     }
61     if (best == -1) { /* no neighbors to select from so select best of all that remain */
62       best = -1;
63       for (k = 0; k < n; k++) {
64         if (done[k]) continue;
65         future = 0.0;
66         past   = 0.0;
67         for (j = ai[k]; j < ai[k + 1]; j++) {
68           kk = aj[j];
69           if (done[kk]) past += PetscAbsScalar(aa[j]);
70           else if (kk != k) future += PetscAbsScalar(aa[j]);
71         }
72         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
73         if (past / future > best) {
74           best       = past / future;
75           newcurrent = k;
76         }
77       }
78     }
79     PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
80     current      = newcurrent;
81     order[i + 1] = current;
82   }
83   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
84   *icol = *irow;
85   PetscCall(PetscObjectReference((PetscObject)*irow));
86   PetscCall(PetscFree(done));
87   PetscCall(PetscFree(order));
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 #endif
91 
92 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
93 {
94   PetscFunctionBegin;
95   *type = MATSOLVERPETSC;
96   PetscFunctionReturn(PETSC_SUCCESS);
97 }
98 
99 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
100 {
101   PetscInt n = A->rmap->n;
102 
103   PetscFunctionBegin;
104 #if defined(PETSC_USE_COMPLEX)
105   if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
106     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
107     *B = NULL;
108     PetscFunctionReturn(PETSC_SUCCESS);
109   }
110 #endif
111 
112   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
113   PetscCall(MatSetSizes(*B, n, n, n, n));
114   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
115     PetscCall(MatSetType(*B, MATSEQAIJ));
116 
117     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
118     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
119 
120     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
121     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
122     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
123     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
124   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
125     PetscCall(MatSetType(*B, MATSEQSBAIJ));
126     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
127 
128     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
129     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
130     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
131     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
132   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
133   (*B)->factortype = ftype;
134 
135   PetscCall(PetscFree((*B)->solvertype));
136   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
137   (*B)->canuseordering = PETSC_TRUE;
138   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
139   PetscFunctionReturn(PETSC_SUCCESS);
140 }
141 
142 #if 0
143 // currently unused
144 static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
145 {
146   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
147   IS                 isicol;
148   const PetscInt    *r, *ic;
149   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
150   PetscInt          *bi, *bj, *ajtmp;
151   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
152   PetscReal          f;
153   PetscInt           nlnk, *lnk, k, **bi_ptr;
154   PetscFreeSpaceList free_space = NULL, current_space = NULL;
155   PetscBT            lnkbt;
156   PetscBool          missing;
157 
158   PetscFunctionBegin;
159   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
160   PetscCall(MatMissingDiagonal(A, &missing, &i));
161   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
162 
163   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
164   PetscCall(ISGetIndices(isrow, &r));
165   PetscCall(ISGetIndices(isicol, &ic));
166 
167   /* get new row pointers */
168   PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi));
169   bi[0] = 0;
170 
171   /* bdiag is location of diagonal in factor */
172   PetscCall(PetscMalloc1(n + 1, &bdiag));
173   bdiag[0] = 0;
174 
175   /* linked list for storing column indices of the active row */
176   nlnk = n + 1;
177   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
178   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
179 
180   /* initial FreeSpace size is f*(ai[n]+1) */
181   f = info->fill;
182   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
183   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
184   current_space = free_space;
185 
186   for (i = 0; i < n; i++) {
187     /* copy previous fill into linked list */
188     nzi   = 0;
189     nnz   = ai[r[i] + 1] - ai[r[i]];
190     ajtmp = aj + ai[r[i]];
191     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
192     nzi += nlnk;
193 
194     /* add pivot rows into linked list */
195     row = lnk[n];
196     while (row < i) {
197       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
198       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
199       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
200       nzi += nlnk;
201       row = lnk[row];
202     }
203     bi[i + 1] = bi[i] + nzi;
204     im[i]     = nzi;
205 
206     /* mark bdiag */
207     nzbd = 0;
208     nnz  = nzi;
209     k    = lnk[n];
210     while (nnz-- && k < i) {
211       nzbd++;
212       k = lnk[k];
213     }
214     bdiag[i] = bi[i] + nzbd;
215 
216     /* if free space is not available, make more free space */
217     if (current_space->local_remaining < nzi) {
218       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
219       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
220       reallocs++;
221     }
222 
223     /* copy data into free space, then initialize lnk */
224     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
225 
226     bi_ptr[i] = current_space->array;
227     current_space->array += nzi;
228     current_space->local_used += nzi;
229     current_space->local_remaining -= nzi;
230   }
231   #if defined(PETSC_USE_INFO)
232   if (ai[n] != 0) {
233     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
234     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
235     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
236     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
237     PetscCall(PetscInfo(A, "for best performance.\n"));
238   } else {
239     PetscCall(PetscInfo(A, "Empty matrix\n"));
240   }
241   #endif
242 
243   PetscCall(ISRestoreIndices(isrow, &r));
244   PetscCall(ISRestoreIndices(isicol, &ic));
245 
246   /* destroy list of free space and other temporary array(s) */
247   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj));
248   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
249   PetscCall(PetscLLDestroy(lnk, lnkbt));
250   PetscCall(PetscFree2(bi_ptr, im));
251 
252   /* put together the new matrix */
253   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
254   b = (Mat_SeqAIJ *)B->data;
255   b->free_ij      = PETSC_TRUE;
256   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a));
257   b->free_a       = PETSC_TRUE;
258   b->j    = bj;
259   b->i    = bi;
260   b->diag = bdiag;
261   b->ilen = NULL;
262   b->imax = NULL;
263   b->row  = isrow;
264   b->col  = iscol;
265   PetscCall(PetscObjectReference((PetscObject)isrow));
266   PetscCall(PetscObjectReference((PetscObject)iscol));
267   b->icol = isicol;
268   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
269 
270   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
271   b->maxnz = b->nz = bi[n];
272 
273   B->factortype            = MAT_FACTOR_LU;
274   B->info.factor_mallocs   = reallocs;
275   B->info.fill_ratio_given = f;
276 
277   if (ai[n]) {
278     B->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
279   } else {
280     B->info.fill_ratio_needed = 0.0;
281   }
282   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
283   if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 #endif
287 
288 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
289 {
290   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
291   IS                 isicol;
292   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
293   PetscInt           i, n = A->rmap->n;
294   PetscInt          *bi, *bj;
295   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
296   PetscReal          f;
297   PetscInt           nlnk, *lnk, k, **bi_ptr;
298   PetscFreeSpaceList free_space = NULL, current_space = NULL;
299   PetscBT            lnkbt;
300   PetscBool          missing;
301 
302   PetscFunctionBegin;
303   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
304   PetscCall(MatMissingDiagonal(A, &missing, &i));
305   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
306 
307   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
308   PetscCall(ISGetIndices(isrow, &r));
309   PetscCall(ISGetIndices(isicol, &ic));
310 
311   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
312   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
313   PetscCall(PetscMalloc1(n + 1, &bdiag));
314   bi[0] = bdiag[0] = 0;
315 
316   /* linked list for storing column indices of the active row */
317   nlnk = n + 1;
318   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
319 
320   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
321 
322   /* initial FreeSpace size is f*(ai[n]+1) */
323   f = info->fill;
324   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
325   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
326   current_space = free_space;
327 
328   for (i = 0; i < n; i++) {
329     /* copy previous fill into linked list */
330     nzi   = 0;
331     nnz   = ai[r[i] + 1] - ai[r[i]];
332     ajtmp = aj + ai[r[i]];
333     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
334     nzi += nlnk;
335 
336     /* add pivot rows into linked list */
337     row = lnk[n];
338     while (row < i) {
339       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
340       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
341       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
342       nzi += nlnk;
343       row = lnk[row];
344     }
345     bi[i + 1] = bi[i] + nzi;
346     im[i]     = nzi;
347 
348     /* mark bdiag */
349     nzbd = 0;
350     nnz  = nzi;
351     k    = lnk[n];
352     while (nnz-- && k < i) {
353       nzbd++;
354       k = lnk[k];
355     }
356     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
357 
358     /* if free space is not available, make more free space */
359     if (current_space->local_remaining < nzi) {
360       /* estimated additional space needed */
361       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
362       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
363       reallocs++;
364     }
365 
366     /* copy data into free space, then initialize lnk */
367     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
368 
369     bi_ptr[i] = current_space->array;
370     current_space->array += nzi;
371     current_space->local_used += nzi;
372     current_space->local_remaining -= nzi;
373   }
374 
375   PetscCall(ISRestoreIndices(isrow, &r));
376   PetscCall(ISRestoreIndices(isicol, &ic));
377 
378   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
379   PetscCall(PetscShmgetAllocateArray(bi[n] + 1, sizeof(PetscInt), (void **)&bj));
380   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
381   PetscCall(PetscLLDestroy(lnk, lnkbt));
382   PetscCall(PetscFree2(bi_ptr, im));
383 
384   /* put together the new matrix */
385   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
386   b          = (Mat_SeqAIJ *)B->data;
387   b->free_ij = PETSC_TRUE;
388   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
389   b->free_a = PETSC_TRUE;
390   b->j      = bj;
391   b->i      = bi;
392   b->diag   = bdiag;
393   b->ilen   = NULL;
394   b->imax   = NULL;
395   b->row    = isrow;
396   b->col    = iscol;
397   PetscCall(PetscObjectReference((PetscObject)isrow));
398   PetscCall(PetscObjectReference((PetscObject)iscol));
399   b->icol = isicol;
400   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
401 
402   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
403   b->maxnz = b->nz = bdiag[0] + 1;
404 
405   B->factortype            = MAT_FACTOR_LU;
406   B->info.factor_mallocs   = reallocs;
407   B->info.fill_ratio_given = f;
408 
409   if (ai[n]) {
410     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
411   } else {
412     B->info.fill_ratio_needed = 0.0;
413   }
414 #if defined(PETSC_USE_INFO)
415   if (ai[n] != 0) {
416     PetscReal af = B->info.fill_ratio_needed;
417     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
418     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
419     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
420     PetscCall(PetscInfo(A, "for best performance.\n"));
421   } else {
422     PetscCall(PetscInfo(A, "Empty matrix\n"));
423   }
424 #endif
425   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
426   if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
427   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
428   PetscFunctionReturn(PETSC_SUCCESS);
429 }
430 
431 /*
432     Trouble in factorization, should we dump the original matrix?
433 */
434 PetscErrorCode MatFactorDumpMatrix(Mat A)
435 {
436   PetscBool flg = PETSC_FALSE;
437 
438   PetscFunctionBegin;
439   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
440   if (flg) {
441     PetscViewer viewer;
442     char        filename[PETSC_MAX_PATH_LEN];
443 
444     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
445     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
446     PetscCall(MatView(A, viewer));
447     PetscCall(PetscViewerDestroy(&viewer));
448   }
449   PetscFunctionReturn(PETSC_SUCCESS);
450 }
451 
452 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
453 {
454   Mat              C = B;
455   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
456   IS               isrow = b->row, isicol = b->icol;
457   const PetscInt  *r, *ic, *ics;
458   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
459   PetscInt         i, j, k, nz, nzL, row, *pj;
460   const PetscInt  *ajtmp, *bjtmp;
461   MatScalar       *rtmp, *pc, multiplier, *pv;
462   const MatScalar *aa = a->a, *v;
463   PetscBool        row_identity, col_identity;
464   FactorShiftCtx   sctx;
465   const PetscInt  *ddiag;
466   PetscReal        rs;
467   MatScalar        d;
468 
469   PetscFunctionBegin;
470   /* MatPivotSetUp(): initialize shift context sctx */
471   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
472 
473   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
474     ddiag          = a->diag;
475     sctx.shift_top = info->zeropivot;
476     for (i = 0; i < n; i++) {
477       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
478       d  = (aa)[ddiag[i]];
479       rs = -PetscAbsScalar(d) - PetscRealPart(d);
480       v  = aa + ai[i];
481       nz = ai[i + 1] - ai[i];
482       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
483       if (rs > sctx.shift_top) sctx.shift_top = rs;
484     }
485     sctx.shift_top *= 1.1;
486     sctx.nshift_max = 5;
487     sctx.shift_lo   = 0.;
488     sctx.shift_hi   = 1.;
489   }
490 
491   PetscCall(ISGetIndices(isrow, &r));
492   PetscCall(ISGetIndices(isicol, &ic));
493   PetscCall(PetscMalloc1(n + 1, &rtmp));
494   ics = ic;
495 
496   do {
497     sctx.newshift = PETSC_FALSE;
498     for (i = 0; i < n; i++) {
499       /* zero rtmp */
500       /* L part */
501       nz    = bi[i + 1] - bi[i];
502       bjtmp = bj + bi[i];
503       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
504 
505       /* U part */
506       nz    = bdiag[i] - bdiag[i + 1];
507       bjtmp = bj + bdiag[i + 1] + 1;
508       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
509 
510       /* load in initial (unfactored row) */
511       nz    = ai[r[i] + 1] - ai[r[i]];
512       ajtmp = aj + ai[r[i]];
513       v     = aa + ai[r[i]];
514       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
515       /* ZeropivotApply() */
516       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
517 
518       /* elimination */
519       bjtmp = bj + bi[i];
520       row   = *bjtmp++;
521       nzL   = bi[i + 1] - bi[i];
522       for (k = 0; k < nzL; k++) {
523         pc = rtmp + row;
524         if (*pc != 0.0) {
525           pv         = b->a + bdiag[row];
526           multiplier = *pc * (*pv);
527           *pc        = multiplier;
528 
529           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
530           pv = b->a + bdiag[row + 1] + 1;
531           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
532 
533           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
534           PetscCall(PetscLogFlops(1 + 2.0 * nz));
535         }
536         row = *bjtmp++;
537       }
538 
539       /* finished row so stick it into b->a */
540       rs = 0.0;
541       /* L part */
542       pv = b->a + bi[i];
543       pj = b->j + bi[i];
544       nz = bi[i + 1] - bi[i];
545       for (j = 0; j < nz; j++) {
546         pv[j] = rtmp[pj[j]];
547         rs += PetscAbsScalar(pv[j]);
548       }
549 
550       /* U part */
551       pv = b->a + bdiag[i + 1] + 1;
552       pj = b->j + bdiag[i + 1] + 1;
553       nz = bdiag[i] - bdiag[i + 1] - 1;
554       for (j = 0; j < nz; j++) {
555         pv[j] = rtmp[pj[j]];
556         rs += PetscAbsScalar(pv[j]);
557       }
558 
559       sctx.rs = rs;
560       sctx.pv = rtmp[i];
561       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
562       if (sctx.newshift) break; /* break for-loop */
563       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
564 
565       /* Mark diagonal and invert diagonal for simpler triangular solves */
566       pv  = b->a + bdiag[i];
567       *pv = 1.0 / rtmp[i];
568 
569     } /* endof for (i=0; i<n; i++) { */
570 
571     /* MatPivotRefine() */
572     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
573       /*
574        * if no shift in this attempt & shifting & started shifting & can refine,
575        * then try lower shift
576        */
577       sctx.shift_hi       = sctx.shift_fraction;
578       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
579       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
580       sctx.newshift       = PETSC_TRUE;
581       sctx.nshift++;
582     }
583   } while (sctx.newshift);
584 
585   PetscCall(PetscFree(rtmp));
586   PetscCall(ISRestoreIndices(isicol, &ic));
587   PetscCall(ISRestoreIndices(isrow, &r));
588 
589   PetscCall(ISIdentity(isrow, &row_identity));
590   PetscCall(ISIdentity(isicol, &col_identity));
591   if (b->inode.size) {
592     C->ops->solve = MatSolve_SeqAIJ_Inode;
593   } else if (row_identity && col_identity) {
594     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
595   } else {
596     C->ops->solve = MatSolve_SeqAIJ;
597   }
598   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
599   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
600   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
601   C->ops->matsolve          = MatMatSolve_SeqAIJ;
602   C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
603   C->assembled              = PETSC_TRUE;
604   C->preallocated           = PETSC_TRUE;
605 
606   PetscCall(PetscLogFlops(C->cmap->n));
607 
608   /* MatShiftView(A,info,&sctx) */
609   if (sctx.nshift) {
610     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
611       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
612     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
613       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
614     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
615       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
616     }
617   }
618   PetscFunctionReturn(PETSC_SUCCESS);
619 }
620 
621 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
622 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
623 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
624 
625 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
626 {
627   Mat              C = B;
628   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
629   IS               isrow = b->row, isicol = b->icol;
630   const PetscInt  *r, *ic, *ics;
631   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
632   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
633   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
634   MatScalar       *pv, *rtmp, *pc, multiplier, d;
635   const MatScalar *v, *aa = a->a;
636   PetscReal        rs = 0.0;
637   FactorShiftCtx   sctx;
638   const PetscInt  *ddiag;
639   PetscBool        row_identity, col_identity;
640 
641   PetscFunctionBegin;
642   /* MatPivotSetUp(): initialize shift context sctx */
643   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
644 
645   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
646     ddiag          = a->diag;
647     sctx.shift_top = info->zeropivot;
648     for (i = 0; i < n; i++) {
649       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
650       d  = (aa)[ddiag[i]];
651       rs = -PetscAbsScalar(d) - PetscRealPart(d);
652       v  = aa + ai[i];
653       nz = ai[i + 1] - ai[i];
654       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
655       if (rs > sctx.shift_top) sctx.shift_top = rs;
656     }
657     sctx.shift_top *= 1.1;
658     sctx.nshift_max = 5;
659     sctx.shift_lo   = 0.;
660     sctx.shift_hi   = 1.;
661   }
662 
663   PetscCall(ISGetIndices(isrow, &r));
664   PetscCall(ISGetIndices(isicol, &ic));
665   PetscCall(PetscMalloc1(n + 1, &rtmp));
666   ics = ic;
667 
668   do {
669     sctx.newshift = PETSC_FALSE;
670     for (i = 0; i < n; i++) {
671       nz    = bi[i + 1] - bi[i];
672       bjtmp = bj + bi[i];
673       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
674 
675       /* load in initial (unfactored row) */
676       nz    = ai[r[i] + 1] - ai[r[i]];
677       ajtmp = aj + ai[r[i]];
678       v     = aa + ai[r[i]];
679       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
680       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
681 
682       row = *bjtmp++;
683       while (row < i) {
684         pc = rtmp + row;
685         if (*pc != 0.0) {
686           pv         = b->a + diag_offset[row];
687           pj         = b->j + diag_offset[row] + 1;
688           multiplier = *pc / *pv++;
689           *pc        = multiplier;
690           nz         = bi[row + 1] - diag_offset[row] - 1;
691           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
692           PetscCall(PetscLogFlops(1 + 2.0 * nz));
693         }
694         row = *bjtmp++;
695       }
696       /* finished row so stick it into b->a */
697       pv   = b->a + bi[i];
698       pj   = b->j + bi[i];
699       nz   = bi[i + 1] - bi[i];
700       diag = diag_offset[i] - bi[i];
701       rs   = 0.0;
702       for (j = 0; j < nz; j++) {
703         pv[j] = rtmp[pj[j]];
704         rs += PetscAbsScalar(pv[j]);
705       }
706       rs -= PetscAbsScalar(pv[diag]);
707 
708       sctx.rs = rs;
709       sctx.pv = pv[diag];
710       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
711       if (sctx.newshift) break;
712       pv[diag] = sctx.pv;
713     }
714 
715     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
716       /*
717        * if no shift in this attempt & shifting & started shifting & can refine,
718        * then try lower shift
719        */
720       sctx.shift_hi       = sctx.shift_fraction;
721       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
722       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
723       sctx.newshift       = PETSC_TRUE;
724       sctx.nshift++;
725     }
726   } while (sctx.newshift);
727 
728   /* invert diagonal entries for simpler triangular solves */
729   for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
730   PetscCall(PetscFree(rtmp));
731   PetscCall(ISRestoreIndices(isicol, &ic));
732   PetscCall(ISRestoreIndices(isrow, &r));
733 
734   PetscCall(ISIdentity(isrow, &row_identity));
735   PetscCall(ISIdentity(isicol, &col_identity));
736   if (row_identity && col_identity) {
737     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
738   } else {
739     C->ops->solve = MatSolve_SeqAIJ_inplace;
740   }
741   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
742   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
743   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
744   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
745   C->ops->matsolvetranspose = NULL;
746 
747   C->assembled    = PETSC_TRUE;
748   C->preallocated = PETSC_TRUE;
749 
750   PetscCall(PetscLogFlops(C->cmap->n));
751   if (sctx.nshift) {
752     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
753       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
754     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
755       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
756     }
757   }
758   C->ops->solve          = MatSolve_SeqAIJ_inplace;
759   C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
760 
761   PetscCall(MatSeqAIJCheckInode(C));
762   PetscFunctionReturn(PETSC_SUCCESS);
763 }
764 
765 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
766 
767 /*
768    This routine implements inplace ILU(0) with row or/and column permutations.
769    Input:
770      A - original matrix
771    Output;
772      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
773          a->j (col index) is permuted by the inverse of colperm, then sorted
774          a->a reordered accordingly with a->j
775          a->diag (ptr to diagonal elements) is updated.
776 */
777 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
778 {
779   Mat_SeqAIJ      *a     = (Mat_SeqAIJ *)A->data;
780   IS               isrow = a->row, isicol = a->icol;
781   const PetscInt  *r, *ic, *ics;
782   PetscInt         i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
783   PetscInt        *ajtmp, nz, row;
784   PetscInt        *diag = a->diag, nbdiag, *pj;
785   PetscScalar     *rtmp, *pc, multiplier, d;
786   MatScalar       *pv, *v;
787   PetscReal        rs;
788   FactorShiftCtx   sctx;
789   const MatScalar *aa = a->a, *vtmp;
790 
791   PetscFunctionBegin;
792   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
793 
794   /* MatPivotSetUp(): initialize shift context sctx */
795   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
796 
797   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
798     const PetscInt *ddiag = a->diag;
799     sctx.shift_top        = info->zeropivot;
800     for (i = 0; i < n; i++) {
801       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
802       d    = (aa)[ddiag[i]];
803       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
804       vtmp = aa + ai[i];
805       nz   = ai[i + 1] - ai[i];
806       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
807       if (rs > sctx.shift_top) sctx.shift_top = rs;
808     }
809     sctx.shift_top *= 1.1;
810     sctx.nshift_max = 5;
811     sctx.shift_lo   = 0.;
812     sctx.shift_hi   = 1.;
813   }
814 
815   PetscCall(ISGetIndices(isrow, &r));
816   PetscCall(ISGetIndices(isicol, &ic));
817   PetscCall(PetscMalloc1(n + 1, &rtmp));
818   PetscCall(PetscArrayzero(rtmp, n + 1));
819   ics = ic;
820 
821 #if defined(MV)
822   sctx.shift_top      = 0.;
823   sctx.nshift_max     = 0;
824   sctx.shift_lo       = 0.;
825   sctx.shift_hi       = 0.;
826   sctx.shift_fraction = 0.;
827 
828   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
829     sctx.shift_top = 0.;
830     for (i = 0; i < n; i++) {
831       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
832       d  = (a->a)[diag[i]];
833       rs = -PetscAbsScalar(d) - PetscRealPart(d);
834       v  = a->a + ai[i];
835       nz = ai[i + 1] - ai[i];
836       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
837       if (rs > sctx.shift_top) sctx.shift_top = rs;
838     }
839     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
840     sctx.shift_top *= 1.1;
841     sctx.nshift_max = 5;
842     sctx.shift_lo   = 0.;
843     sctx.shift_hi   = 1.;
844   }
845 
846   sctx.shift_amount = 0.;
847   sctx.nshift       = 0;
848 #endif
849 
850   do {
851     sctx.newshift = PETSC_FALSE;
852     for (i = 0; i < n; i++) {
853       /* load in initial unfactored row */
854       nz    = ai[r[i] + 1] - ai[r[i]];
855       ajtmp = aj + ai[r[i]];
856       v     = a->a + ai[r[i]];
857       /* sort permuted ajtmp and values v accordingly */
858       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
859       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
860 
861       diag[r[i]] = ai[r[i]];
862       for (j = 0; j < nz; j++) {
863         rtmp[ajtmp[j]] = v[j];
864         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
865       }
866       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
867 
868       row = *ajtmp++;
869       while (row < i) {
870         pc = rtmp + row;
871         if (*pc != 0.0) {
872           pv = a->a + diag[r[row]];
873           pj = aj + diag[r[row]] + 1;
874 
875           multiplier = *pc / *pv++;
876           *pc        = multiplier;
877           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
878           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
879           PetscCall(PetscLogFlops(1 + 2.0 * nz));
880         }
881         row = *ajtmp++;
882       }
883       /* finished row so overwrite it onto a->a */
884       pv     = a->a + ai[r[i]];
885       pj     = aj + ai[r[i]];
886       nz     = ai[r[i] + 1] - ai[r[i]];
887       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
888 
889       rs = 0.0;
890       for (j = 0; j < nz; j++) {
891         pv[j] = rtmp[pj[j]];
892         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
893       }
894 
895       sctx.rs = rs;
896       sctx.pv = pv[nbdiag];
897       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
898       if (sctx.newshift) break;
899       pv[nbdiag] = sctx.pv;
900     }
901 
902     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
903       /*
904        * if no shift in this attempt & shifting & started shifting & can refine,
905        * then try lower shift
906        */
907       sctx.shift_hi       = sctx.shift_fraction;
908       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
909       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
910       sctx.newshift       = PETSC_TRUE;
911       sctx.nshift++;
912     }
913   } while (sctx.newshift);
914 
915   /* invert diagonal entries for simpler triangular solves */
916   for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];
917 
918   PetscCall(PetscFree(rtmp));
919   PetscCall(ISRestoreIndices(isicol, &ic));
920   PetscCall(ISRestoreIndices(isrow, &r));
921 
922   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
923   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
924   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
925   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
926 
927   A->assembled    = PETSC_TRUE;
928   A->preallocated = PETSC_TRUE;
929 
930   PetscCall(PetscLogFlops(A->cmap->n));
931   if (sctx.nshift) {
932     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
933       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
934     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
935       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
936     }
937   }
938   PetscFunctionReturn(PETSC_SUCCESS);
939 }
940 
941 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
942 {
943   Mat C;
944 
945   PetscFunctionBegin;
946   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
947   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
948   PetscCall(MatLUFactorNumeric(C, A, info));
949 
950   A->ops->solve          = C->ops->solve;
951   A->ops->solvetranspose = C->ops->solvetranspose;
952 
953   PetscCall(MatHeaderMerge(A, &C));
954   PetscFunctionReturn(PETSC_SUCCESS);
955 }
956 
957 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
958 {
959   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
960   IS                 iscol = a->col, isrow = a->row;
961   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
962   PetscInt           nz;
963   const PetscInt    *rout, *cout, *r, *c;
964   PetscScalar       *x, *tmp, *tmps, sum;
965   const PetscScalar *b;
966   const MatScalar   *aa = a->a, *v;
967 
968   PetscFunctionBegin;
969   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
970 
971   PetscCall(VecGetArrayRead(bb, &b));
972   PetscCall(VecGetArrayWrite(xx, &x));
973   tmp = a->solve_work;
974 
975   PetscCall(ISGetIndices(isrow, &rout));
976   r = rout;
977   PetscCall(ISGetIndices(iscol, &cout));
978   c = cout + (n - 1);
979 
980   /* forward solve the lower triangular */
981   tmp[0] = b[*r++];
982   tmps   = tmp;
983   for (i = 1; i < n; i++) {
984     v   = aa + ai[i];
985     vi  = aj + ai[i];
986     nz  = a->diag[i] - ai[i];
987     sum = b[*r++];
988     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
989     tmp[i] = sum;
990   }
991 
992   /* backward solve the upper triangular */
993   for (i = n - 1; i >= 0; i--) {
994     v   = aa + a->diag[i] + 1;
995     vi  = aj + a->diag[i] + 1;
996     nz  = ai[i + 1] - a->diag[i] - 1;
997     sum = tmp[i];
998     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
999     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
1000   }
1001 
1002   PetscCall(ISRestoreIndices(isrow, &rout));
1003   PetscCall(ISRestoreIndices(iscol, &cout));
1004   PetscCall(VecRestoreArrayRead(bb, &b));
1005   PetscCall(VecRestoreArrayWrite(xx, &x));
1006   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1007   PetscFunctionReturn(PETSC_SUCCESS);
1008 }
1009 
1010 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1011 {
1012   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1013   IS                 iscol = a->col, isrow = a->row;
1014   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1015   PetscInt           nz, neq, ldb, ldx;
1016   const PetscInt    *rout, *cout, *r, *c;
1017   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
1018   const PetscScalar *b, *aa  = a->a, *v;
1019   PetscBool          isdense;
1020 
1021   PetscFunctionBegin;
1022   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1023   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1024   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1025   if (X != B) {
1026     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1027     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1028   }
1029   PetscCall(MatDenseGetArrayRead(B, &b));
1030   PetscCall(MatDenseGetLDA(B, &ldb));
1031   PetscCall(MatDenseGetArray(X, &x));
1032   PetscCall(MatDenseGetLDA(X, &ldx));
1033   PetscCall(ISGetIndices(isrow, &rout));
1034   r = rout;
1035   PetscCall(ISGetIndices(iscol, &cout));
1036   c = cout;
1037   for (neq = 0; neq < B->cmap->n; neq++) {
1038     /* forward solve the lower triangular */
1039     tmp[0] = b[r[0]];
1040     tmps   = tmp;
1041     for (i = 1; i < n; i++) {
1042       v   = aa + ai[i];
1043       vi  = aj + ai[i];
1044       nz  = a->diag[i] - ai[i];
1045       sum = b[r[i]];
1046       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1047       tmp[i] = sum;
1048     }
1049     /* backward solve the upper triangular */
1050     for (i = n - 1; i >= 0; i--) {
1051       v   = aa + a->diag[i] + 1;
1052       vi  = aj + a->diag[i] + 1;
1053       nz  = ai[i + 1] - a->diag[i] - 1;
1054       sum = tmp[i];
1055       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1056       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1057     }
1058     b += ldb;
1059     x += ldx;
1060   }
1061   PetscCall(ISRestoreIndices(isrow, &rout));
1062   PetscCall(ISRestoreIndices(iscol, &cout));
1063   PetscCall(MatDenseRestoreArrayRead(B, &b));
1064   PetscCall(MatDenseRestoreArray(X, &x));
1065   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1066   PetscFunctionReturn(PETSC_SUCCESS);
1067 }
1068 
1069 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1070 {
1071   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1072   IS                 iscol = a->col, isrow = a->row;
1073   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1074   PetscInt           nz, neq, ldb, ldx;
1075   const PetscInt    *rout, *cout, *r, *c;
1076   PetscScalar       *x, *tmp = a->solve_work, sum;
1077   const PetscScalar *b, *aa  = a->a, *v;
1078   PetscBool          isdense;
1079 
1080   PetscFunctionBegin;
1081   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1082   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1083   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1084   if (X != B) {
1085     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1086     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1087   }
1088   PetscCall(MatDenseGetArrayRead(B, &b));
1089   PetscCall(MatDenseGetLDA(B, &ldb));
1090   PetscCall(MatDenseGetArray(X, &x));
1091   PetscCall(MatDenseGetLDA(X, &ldx));
1092   PetscCall(ISGetIndices(isrow, &rout));
1093   r = rout;
1094   PetscCall(ISGetIndices(iscol, &cout));
1095   c = cout;
1096   for (neq = 0; neq < B->cmap->n; neq++) {
1097     /* forward solve the lower triangular */
1098     tmp[0] = b[r[0]];
1099     v      = aa;
1100     vi     = aj;
1101     for (i = 1; i < n; i++) {
1102       nz  = ai[i + 1] - ai[i];
1103       sum = b[r[i]];
1104       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1105       tmp[i] = sum;
1106       v += nz;
1107       vi += nz;
1108     }
1109     /* backward solve the upper triangular */
1110     for (i = n - 1; i >= 0; i--) {
1111       v   = aa + adiag[i + 1] + 1;
1112       vi  = aj + adiag[i + 1] + 1;
1113       nz  = adiag[i] - adiag[i + 1] - 1;
1114       sum = tmp[i];
1115       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1116       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1117     }
1118     b += ldb;
1119     x += ldx;
1120   }
1121   PetscCall(ISRestoreIndices(isrow, &rout));
1122   PetscCall(ISRestoreIndices(iscol, &cout));
1123   PetscCall(MatDenseRestoreArrayRead(B, &b));
1124   PetscCall(MatDenseRestoreArray(X, &x));
1125   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1126   PetscFunctionReturn(PETSC_SUCCESS);
1127 }
1128 
1129 PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
1130 {
1131   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1132   IS                 iscol = a->col, isrow = a->row;
1133   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, j;
1134   PetscInt           nz, neq, ldb, ldx;
1135   const PetscInt    *rout, *cout, *r, *c;
1136   PetscScalar       *x, *tmp = a->solve_work, s1;
1137   const PetscScalar *b, *aa  = a->a, *v;
1138   PetscBool          isdense;
1139 
1140   PetscFunctionBegin;
1141   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1142   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1143   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1144   if (X != B) {
1145     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1146     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1147   }
1148   PetscCall(MatDenseGetArrayRead(B, &b));
1149   PetscCall(MatDenseGetLDA(B, &ldb));
1150   PetscCall(MatDenseGetArray(X, &x));
1151   PetscCall(MatDenseGetLDA(X, &ldx));
1152   PetscCall(ISGetIndices(isrow, &rout));
1153   r = rout;
1154   PetscCall(ISGetIndices(iscol, &cout));
1155   c = cout;
1156   for (neq = 0; neq < B->cmap->n; neq++) {
1157     /* copy the b into temp work space according to permutation */
1158     for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1159 
1160     /* forward solve the U^T */
1161     for (i = 0; i < n; i++) {
1162       v  = aa + adiag[i + 1] + 1;
1163       vi = aj + adiag[i + 1] + 1;
1164       nz = adiag[i] - adiag[i + 1] - 1;
1165       s1 = tmp[i];
1166       s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1167       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1168       tmp[i] = s1;
1169     }
1170 
1171     /* backward solve the L^T */
1172     for (i = n - 1; i >= 0; i--) {
1173       v  = aa + ai[i];
1174       vi = aj + ai[i];
1175       nz = ai[i + 1] - ai[i];
1176       s1 = tmp[i];
1177       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1178     }
1179 
1180     /* copy tmp into x according to permutation */
1181     for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1182     b += ldb;
1183     x += ldx;
1184   }
1185   PetscCall(ISRestoreIndices(isrow, &rout));
1186   PetscCall(ISRestoreIndices(iscol, &cout));
1187   PetscCall(MatDenseRestoreArrayRead(B, &b));
1188   PetscCall(MatDenseRestoreArray(X, &x));
1189   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1190   PetscFunctionReturn(PETSC_SUCCESS);
1191 }
1192 
1193 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1194 {
1195   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1196   IS                 iscol = a->col, isrow = a->row;
1197   const PetscInt    *r, *c, *rout, *cout;
1198   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1199   PetscInt           nz, row;
1200   PetscScalar       *x, *tmp, *tmps, sum;
1201   const PetscScalar *b;
1202   const MatScalar   *aa = a->a, *v;
1203 
1204   PetscFunctionBegin;
1205   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1206 
1207   PetscCall(VecGetArrayRead(bb, &b));
1208   PetscCall(VecGetArrayWrite(xx, &x));
1209   tmp = a->solve_work;
1210 
1211   PetscCall(ISGetIndices(isrow, &rout));
1212   r = rout;
1213   PetscCall(ISGetIndices(iscol, &cout));
1214   c = cout + (n - 1);
1215 
1216   /* forward solve the lower triangular */
1217   tmp[0] = b[*r++];
1218   tmps   = tmp;
1219   for (row = 1; row < n; row++) {
1220     i   = rout[row]; /* permuted row */
1221     v   = aa + ai[i];
1222     vi  = aj + ai[i];
1223     nz  = a->diag[i] - ai[i];
1224     sum = b[*r++];
1225     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1226     tmp[row] = sum;
1227   }
1228 
1229   /* backward solve the upper triangular */
1230   for (row = n - 1; row >= 0; row--) {
1231     i   = rout[row]; /* permuted row */
1232     v   = aa + a->diag[i] + 1;
1233     vi  = aj + a->diag[i] + 1;
1234     nz  = ai[i + 1] - a->diag[i] - 1;
1235     sum = tmp[row];
1236     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1237     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1238   }
1239 
1240   PetscCall(ISRestoreIndices(isrow, &rout));
1241   PetscCall(ISRestoreIndices(iscol, &cout));
1242   PetscCall(VecRestoreArrayRead(bb, &b));
1243   PetscCall(VecRestoreArrayWrite(xx, &x));
1244   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1245   PetscFunctionReturn(PETSC_SUCCESS);
1246 }
1247 
1248 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1249 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1250 {
1251   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1252   PetscInt           n  = A->rmap->n;
1253   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1254   PetscScalar       *x;
1255   const PetscScalar *b;
1256   const MatScalar   *aa = a->a;
1257 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1258   PetscInt         adiag_i, i, nz, ai_i;
1259   const PetscInt  *vi;
1260   const MatScalar *v;
1261   PetscScalar      sum;
1262 #endif
1263 
1264   PetscFunctionBegin;
1265   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1266 
1267   PetscCall(VecGetArrayRead(bb, &b));
1268   PetscCall(VecGetArrayWrite(xx, &x));
1269 
1270 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1271   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1272 #else
1273   /* forward solve the lower triangular */
1274   x[0] = b[0];
1275   for (i = 1; i < n; i++) {
1276     ai_i = ai[i];
1277     v    = aa + ai_i;
1278     vi   = aj + ai_i;
1279     nz   = adiag[i] - ai_i;
1280     sum  = b[i];
1281     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1282     x[i] = sum;
1283   }
1284 
1285   /* backward solve the upper triangular */
1286   for (i = n - 1; i >= 0; i--) {
1287     adiag_i = adiag[i];
1288     v       = aa + adiag_i + 1;
1289     vi      = aj + adiag_i + 1;
1290     nz      = ai[i + 1] - adiag_i - 1;
1291     sum     = x[i];
1292     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1293     x[i] = sum * aa[adiag_i];
1294   }
1295 #endif
1296   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1297   PetscCall(VecRestoreArrayRead(bb, &b));
1298   PetscCall(VecRestoreArrayWrite(xx, &x));
1299   PetscFunctionReturn(PETSC_SUCCESS);
1300 }
1301 
1302 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1303 {
1304   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1305   IS                 iscol = a->col, isrow = a->row;
1306   PetscInt           i, n                  = A->rmap->n, j;
1307   PetscInt           nz;
1308   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1309   PetscScalar       *x, *tmp, sum;
1310   const PetscScalar *b;
1311   const MatScalar   *aa = a->a, *v;
1312 
1313   PetscFunctionBegin;
1314   if (yy != xx) PetscCall(VecCopy(yy, xx));
1315 
1316   PetscCall(VecGetArrayRead(bb, &b));
1317   PetscCall(VecGetArray(xx, &x));
1318   tmp = a->solve_work;
1319 
1320   PetscCall(ISGetIndices(isrow, &rout));
1321   r = rout;
1322   PetscCall(ISGetIndices(iscol, &cout));
1323   c = cout + (n - 1);
1324 
1325   /* forward solve the lower triangular */
1326   tmp[0] = b[*r++];
1327   for (i = 1; i < n; i++) {
1328     v   = aa + ai[i];
1329     vi  = aj + ai[i];
1330     nz  = a->diag[i] - ai[i];
1331     sum = b[*r++];
1332     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1333     tmp[i] = sum;
1334   }
1335 
1336   /* backward solve the upper triangular */
1337   for (i = n - 1; i >= 0; i--) {
1338     v   = aa + a->diag[i] + 1;
1339     vi  = aj + a->diag[i] + 1;
1340     nz  = ai[i + 1] - a->diag[i] - 1;
1341     sum = tmp[i];
1342     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1343     tmp[i] = sum * aa[a->diag[i]];
1344     x[*c--] += tmp[i];
1345   }
1346 
1347   PetscCall(ISRestoreIndices(isrow, &rout));
1348   PetscCall(ISRestoreIndices(iscol, &cout));
1349   PetscCall(VecRestoreArrayRead(bb, &b));
1350   PetscCall(VecRestoreArray(xx, &x));
1351   PetscCall(PetscLogFlops(2.0 * a->nz));
1352   PetscFunctionReturn(PETSC_SUCCESS);
1353 }
1354 
1355 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1356 {
1357   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1358   IS                 iscol = a->col, isrow = a->row;
1359   PetscInt           i, n                  = A->rmap->n, j;
1360   PetscInt           nz;
1361   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1362   PetscScalar       *x, *tmp, sum;
1363   const PetscScalar *b;
1364   const MatScalar   *aa = a->a, *v;
1365 
1366   PetscFunctionBegin;
1367   if (yy != xx) PetscCall(VecCopy(yy, xx));
1368 
1369   PetscCall(VecGetArrayRead(bb, &b));
1370   PetscCall(VecGetArray(xx, &x));
1371   tmp = a->solve_work;
1372 
1373   PetscCall(ISGetIndices(isrow, &rout));
1374   r = rout;
1375   PetscCall(ISGetIndices(iscol, &cout));
1376   c = cout;
1377 
1378   /* forward solve the lower triangular */
1379   tmp[0] = b[r[0]];
1380   v      = aa;
1381   vi     = aj;
1382   for (i = 1; i < n; i++) {
1383     nz  = ai[i + 1] - ai[i];
1384     sum = b[r[i]];
1385     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1386     tmp[i] = sum;
1387     v += nz;
1388     vi += nz;
1389   }
1390 
1391   /* backward solve the upper triangular */
1392   v  = aa + adiag[n - 1];
1393   vi = aj + adiag[n - 1];
1394   for (i = n - 1; i >= 0; i--) {
1395     nz  = adiag[i] - adiag[i + 1] - 1;
1396     sum = tmp[i];
1397     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1398     tmp[i] = sum * v[nz];
1399     x[c[i]] += tmp[i];
1400     v += nz + 1;
1401     vi += nz + 1;
1402   }
1403 
1404   PetscCall(ISRestoreIndices(isrow, &rout));
1405   PetscCall(ISRestoreIndices(iscol, &cout));
1406   PetscCall(VecRestoreArrayRead(bb, &b));
1407   PetscCall(VecRestoreArray(xx, &x));
1408   PetscCall(PetscLogFlops(2.0 * a->nz));
1409   PetscFunctionReturn(PETSC_SUCCESS);
1410 }
1411 
1412 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1413 {
1414   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1415   IS                 iscol = a->col, isrow = a->row;
1416   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1417   PetscInt           i, n = A->rmap->n, j;
1418   PetscInt           nz;
1419   PetscScalar       *x, *tmp, s1;
1420   const MatScalar   *aa = a->a, *v;
1421   const PetscScalar *b;
1422 
1423   PetscFunctionBegin;
1424   PetscCall(VecGetArrayRead(bb, &b));
1425   PetscCall(VecGetArrayWrite(xx, &x));
1426   tmp = a->solve_work;
1427 
1428   PetscCall(ISGetIndices(isrow, &rout));
1429   r = rout;
1430   PetscCall(ISGetIndices(iscol, &cout));
1431   c = cout;
1432 
1433   /* copy the b into temp work space according to permutation */
1434   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1435 
1436   /* forward solve the U^T */
1437   for (i = 0; i < n; i++) {
1438     v  = aa + diag[i];
1439     vi = aj + diag[i] + 1;
1440     nz = ai[i + 1] - diag[i] - 1;
1441     s1 = tmp[i];
1442     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1443     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1444     tmp[i] = s1;
1445   }
1446 
1447   /* backward solve the L^T */
1448   for (i = n - 1; i >= 0; i--) {
1449     v  = aa + diag[i] - 1;
1450     vi = aj + diag[i] - 1;
1451     nz = diag[i] - ai[i];
1452     s1 = tmp[i];
1453     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1454   }
1455 
1456   /* copy tmp into x according to permutation */
1457   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1458 
1459   PetscCall(ISRestoreIndices(isrow, &rout));
1460   PetscCall(ISRestoreIndices(iscol, &cout));
1461   PetscCall(VecRestoreArrayRead(bb, &b));
1462   PetscCall(VecRestoreArrayWrite(xx, &x));
1463 
1464   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1465   PetscFunctionReturn(PETSC_SUCCESS);
1466 }
1467 
1468 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1469 {
1470   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1471   IS                 iscol = a->col, isrow = a->row;
1472   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1473   PetscInt           i, n = A->rmap->n, j;
1474   PetscInt           nz;
1475   PetscScalar       *x, *tmp, s1;
1476   const MatScalar   *aa = a->a, *v;
1477   const PetscScalar *b;
1478 
1479   PetscFunctionBegin;
1480   PetscCall(VecGetArrayRead(bb, &b));
1481   PetscCall(VecGetArrayWrite(xx, &x));
1482   tmp = a->solve_work;
1483 
1484   PetscCall(ISGetIndices(isrow, &rout));
1485   r = rout;
1486   PetscCall(ISGetIndices(iscol, &cout));
1487   c = cout;
1488 
1489   /* copy the b into temp work space according to permutation */
1490   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1491 
1492   /* forward solve the U^T */
1493   for (i = 0; i < n; i++) {
1494     v  = aa + adiag[i + 1] + 1;
1495     vi = aj + adiag[i + 1] + 1;
1496     nz = adiag[i] - adiag[i + 1] - 1;
1497     s1 = tmp[i];
1498     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1499     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1500     tmp[i] = s1;
1501   }
1502 
1503   /* backward solve the L^T */
1504   for (i = n - 1; i >= 0; i--) {
1505     v  = aa + ai[i];
1506     vi = aj + ai[i];
1507     nz = ai[i + 1] - ai[i];
1508     s1 = tmp[i];
1509     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1510   }
1511 
1512   /* copy tmp into x according to permutation */
1513   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1514 
1515   PetscCall(ISRestoreIndices(isrow, &rout));
1516   PetscCall(ISRestoreIndices(iscol, &cout));
1517   PetscCall(VecRestoreArrayRead(bb, &b));
1518   PetscCall(VecRestoreArrayWrite(xx, &x));
1519 
1520   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1521   PetscFunctionReturn(PETSC_SUCCESS);
1522 }
1523 
1524 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1525 {
1526   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1527   IS                 iscol = a->col, isrow = a->row;
1528   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1529   PetscInt           i, n = A->rmap->n, j;
1530   PetscInt           nz;
1531   PetscScalar       *x, *tmp, s1;
1532   const MatScalar   *aa = a->a, *v;
1533   const PetscScalar *b;
1534 
1535   PetscFunctionBegin;
1536   if (zz != xx) PetscCall(VecCopy(zz, xx));
1537   PetscCall(VecGetArrayRead(bb, &b));
1538   PetscCall(VecGetArray(xx, &x));
1539   tmp = a->solve_work;
1540 
1541   PetscCall(ISGetIndices(isrow, &rout));
1542   r = rout;
1543   PetscCall(ISGetIndices(iscol, &cout));
1544   c = cout;
1545 
1546   /* copy the b into temp work space according to permutation */
1547   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1548 
1549   /* forward solve the U^T */
1550   for (i = 0; i < n; i++) {
1551     v  = aa + diag[i];
1552     vi = aj + diag[i] + 1;
1553     nz = ai[i + 1] - diag[i] - 1;
1554     s1 = tmp[i];
1555     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1556     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1557     tmp[i] = s1;
1558   }
1559 
1560   /* backward solve the L^T */
1561   for (i = n - 1; i >= 0; i--) {
1562     v  = aa + diag[i] - 1;
1563     vi = aj + diag[i] - 1;
1564     nz = diag[i] - ai[i];
1565     s1 = tmp[i];
1566     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1567   }
1568 
1569   /* copy tmp into x according to permutation */
1570   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1571 
1572   PetscCall(ISRestoreIndices(isrow, &rout));
1573   PetscCall(ISRestoreIndices(iscol, &cout));
1574   PetscCall(VecRestoreArrayRead(bb, &b));
1575   PetscCall(VecRestoreArray(xx, &x));
1576 
1577   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1578   PetscFunctionReturn(PETSC_SUCCESS);
1579 }
1580 
1581 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1582 {
1583   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1584   IS                 iscol = a->col, isrow = a->row;
1585   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1586   PetscInt           i, n = A->rmap->n, j;
1587   PetscInt           nz;
1588   PetscScalar       *x, *tmp, s1;
1589   const MatScalar   *aa = a->a, *v;
1590   const PetscScalar *b;
1591 
1592   PetscFunctionBegin;
1593   if (zz != xx) PetscCall(VecCopy(zz, xx));
1594   PetscCall(VecGetArrayRead(bb, &b));
1595   PetscCall(VecGetArray(xx, &x));
1596   tmp = a->solve_work;
1597 
1598   PetscCall(ISGetIndices(isrow, &rout));
1599   r = rout;
1600   PetscCall(ISGetIndices(iscol, &cout));
1601   c = cout;
1602 
1603   /* copy the b into temp work space according to permutation */
1604   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1605 
1606   /* forward solve the U^T */
1607   for (i = 0; i < n; i++) {
1608     v  = aa + adiag[i + 1] + 1;
1609     vi = aj + adiag[i + 1] + 1;
1610     nz = adiag[i] - adiag[i + 1] - 1;
1611     s1 = tmp[i];
1612     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1613     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1614     tmp[i] = s1;
1615   }
1616 
1617   /* backward solve the L^T */
1618   for (i = n - 1; i >= 0; i--) {
1619     v  = aa + ai[i];
1620     vi = aj + ai[i];
1621     nz = ai[i + 1] - ai[i];
1622     s1 = tmp[i];
1623     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1624   }
1625 
1626   /* copy tmp into x according to permutation */
1627   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1628 
1629   PetscCall(ISRestoreIndices(isrow, &rout));
1630   PetscCall(ISRestoreIndices(iscol, &cout));
1631   PetscCall(VecRestoreArrayRead(bb, &b));
1632   PetscCall(VecRestoreArray(xx, &x));
1633 
1634   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1635   PetscFunctionReturn(PETSC_SUCCESS);
1636 }
1637 
1638 /*
1639    ilu() under revised new data structure.
1640    Factored arrays bj and ba are stored as
1641      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1642 
1643    bi=fact->i is an array of size n+1, in which
1644      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1645      bi[n]:  points to L(n-1,n-1)+1
1646 
1647   bdiag=fact->diag is an array of size n+1,in which
1648      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1649      bdiag[n]: points to entry of U(n-1,0)-1
1650 
1651    U(i,:) contains bdiag[i] as its last entry, i.e.,
1652     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1653 */
1654 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1655 {
1656   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1657   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1658   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1659   IS             isicol;
1660 
1661   PetscFunctionBegin;
1662   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1663   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1664   b = (Mat_SeqAIJ *)fact->data;
1665 
1666   /* allocate matrix arrays for new data structure */
1667   PetscCall(PetscShmgetAllocateArray(ai[n] + 1, sizeof(PetscScalar), (void **)&b->a));
1668   PetscCall(PetscShmgetAllocateArray(ai[n] + 1, sizeof(PetscInt), (void **)&b->j));
1669   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i));
1670   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1671   b->free_a  = PETSC_TRUE;
1672   b->free_ij = PETSC_TRUE;
1673 
1674   if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1675   bdiag = b->diag;
1676 
1677   /* set bi and bj with new data structure */
1678   bi = b->i;
1679   bj = b->j;
1680 
1681   /* L part */
1682   bi[0] = 0;
1683   for (i = 0; i < n; i++) {
1684     nz        = adiag[i] - ai[i];
1685     bi[i + 1] = bi[i] + nz;
1686     aj        = a->j + ai[i];
1687     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1688   }
1689 
1690   /* U part */
1691   bdiag[n] = bi[n] - 1;
1692   for (i = n - 1; i >= 0; i--) {
1693     nz = ai[i + 1] - adiag[i] - 1;
1694     aj = a->j + adiag[i] + 1;
1695     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1696     /* diag[i] */
1697     bj[k++]  = i;
1698     bdiag[i] = bdiag[i + 1] + nz + 1;
1699   }
1700 
1701   fact->factortype             = MAT_FACTOR_ILU;
1702   fact->info.factor_mallocs    = 0;
1703   fact->info.fill_ratio_given  = info->fill;
1704   fact->info.fill_ratio_needed = 1.0;
1705   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1706   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1707 
1708   b       = (Mat_SeqAIJ *)fact->data;
1709   b->row  = isrow;
1710   b->col  = iscol;
1711   b->icol = isicol;
1712   PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
1713   PetscCall(PetscObjectReference((PetscObject)isrow));
1714   PetscCall(PetscObjectReference((PetscObject)iscol));
1715   PetscFunctionReturn(PETSC_SUCCESS);
1716 }
1717 
1718 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1719 {
1720   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1721   IS                 isicol;
1722   const PetscInt    *r, *ic;
1723   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1724   PetscInt          *bi, *cols, nnz, *cols_lvl;
1725   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1726   PetscInt           i, levels, diagonal_fill;
1727   PetscBool          col_identity, row_identity, missing;
1728   PetscReal          f;
1729   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1730   PetscBT            lnkbt;
1731   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1732   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1733   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1734 
1735   PetscFunctionBegin;
1736   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1737   PetscCall(MatMissingDiagonal(A, &missing, &i));
1738   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1739 
1740   levels = (PetscInt)info->levels;
1741   PetscCall(ISIdentity(isrow, &row_identity));
1742   PetscCall(ISIdentity(iscol, &col_identity));
1743   if (!levels && row_identity && col_identity) {
1744     /* special case: ilu(0) with natural ordering */
1745     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1746     if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1747     PetscFunctionReturn(PETSC_SUCCESS);
1748   }
1749 
1750   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1751   PetscCall(ISGetIndices(isrow, &r));
1752   PetscCall(ISGetIndices(isicol, &ic));
1753 
1754   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1755   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
1756   PetscCall(PetscMalloc1(n + 1, &bdiag));
1757   bi[0] = bdiag[0] = 0;
1758   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1759 
1760   /* create a linked list for storing column indices of the active row */
1761   nlnk = n + 1;
1762   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1763 
1764   /* initial FreeSpace size is f*(ai[n]+1) */
1765   f             = info->fill;
1766   diagonal_fill = (PetscInt)info->diagonal_fill;
1767   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1768   current_space = free_space;
1769   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1770   current_space_lvl = free_space_lvl;
1771   for (i = 0; i < n; i++) {
1772     nzi = 0;
1773     /* copy current row into linked list */
1774     nnz = ai[r[i] + 1] - ai[r[i]];
1775     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1776     cols   = aj + ai[r[i]];
1777     lnk[i] = -1; /* marker to indicate if diagonal exists */
1778     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1779     nzi += nlnk;
1780 
1781     /* make sure diagonal entry is included */
1782     if (diagonal_fill && lnk[i] == -1) {
1783       fm = n;
1784       while (lnk[fm] < i) fm = lnk[fm];
1785       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1786       lnk[fm]    = i;
1787       lnk_lvl[i] = 0;
1788       nzi++;
1789       dcount++;
1790     }
1791 
1792     /* add pivot rows into the active row */
1793     nzbd = 0;
1794     prow = lnk[n];
1795     while (prow < i) {
1796       nnz      = bdiag[prow];
1797       cols     = bj_ptr[prow] + nnz + 1;
1798       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1799       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1800       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1801       nzi += nlnk;
1802       prow = lnk[prow];
1803       nzbd++;
1804     }
1805     bdiag[i]  = nzbd;
1806     bi[i + 1] = bi[i] + nzi;
1807     /* if free space is not available, make more free space */
1808     if (current_space->local_remaining < nzi) {
1809       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1810       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1811       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1812       reallocs++;
1813     }
1814 
1815     /* copy data into free_space and free_space_lvl, then initialize lnk */
1816     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1817     bj_ptr[i]    = current_space->array;
1818     bjlvl_ptr[i] = current_space_lvl->array;
1819 
1820     /* make sure the active row i has diagonal entry */
1821     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1822 
1823     current_space->array += nzi;
1824     current_space->local_used += nzi;
1825     current_space->local_remaining -= nzi;
1826     current_space_lvl->array += nzi;
1827     current_space_lvl->local_used += nzi;
1828     current_space_lvl->local_remaining -= nzi;
1829   }
1830 
1831   PetscCall(ISRestoreIndices(isrow, &r));
1832   PetscCall(ISRestoreIndices(isicol, &ic));
1833   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1834   PetscCall(PetscShmgetAllocateArray(bi[n] + 1, sizeof(PetscInt), (void **)&bj));
1835   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1836 
1837   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1838   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1839   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1840 
1841 #if defined(PETSC_USE_INFO)
1842   {
1843     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1844     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1845     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1846     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1847     PetscCall(PetscInfo(A, "for best performance.\n"));
1848     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1849   }
1850 #endif
1851   /* put together the new matrix */
1852   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1853   b          = (Mat_SeqAIJ *)fact->data;
1854   b->free_ij = PETSC_TRUE;
1855   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
1856   b->free_a = PETSC_TRUE;
1857   b->j      = bj;
1858   b->i      = bi;
1859   b->diag   = bdiag;
1860   b->ilen   = NULL;
1861   b->imax   = NULL;
1862   b->row    = isrow;
1863   b->col    = iscol;
1864   PetscCall(PetscObjectReference((PetscObject)isrow));
1865   PetscCall(PetscObjectReference((PetscObject)iscol));
1866   b->icol = isicol;
1867 
1868   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1869   /* In b structure:  Free imax, ilen, old a, old j.
1870      Allocate bdiag, solve_work, new a, new j */
1871   b->maxnz = b->nz = bdiag[0] + 1;
1872 
1873   fact->info.factor_mallocs    = reallocs;
1874   fact->info.fill_ratio_given  = f;
1875   fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1876   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1877   if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1878   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1879   PetscFunctionReturn(PETSC_SUCCESS);
1880 }
1881 
1882 #if 0
1883 // unused
1884 static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1885 {
1886   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1887   IS                 isicol;
1888   const PetscInt    *r, *ic;
1889   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1890   PetscInt          *bi, *cols, nnz, *cols_lvl;
1891   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1892   PetscInt           i, levels, diagonal_fill;
1893   PetscBool          col_identity, row_identity;
1894   PetscReal          f;
1895   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1896   PetscBT            lnkbt;
1897   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1898   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1899   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1900   PetscBool          missing;
1901 
1902   PetscFunctionBegin;
1903   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1904   PetscCall(MatMissingDiagonal(A, &missing, &i));
1905   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1906 
1907   f             = info->fill;
1908   levels        = (PetscInt)info->levels;
1909   diagonal_fill = (PetscInt)info->diagonal_fill;
1910 
1911   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1912 
1913   PetscCall(ISIdentity(isrow, &row_identity));
1914   PetscCall(ISIdentity(iscol, &col_identity));
1915   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1916     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));
1917 
1918     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1919     if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1920     fact->factortype               = MAT_FACTOR_ILU;
1921     (fact)->info.factor_mallocs    = 0;
1922     (fact)->info.fill_ratio_given  = info->fill;
1923     (fact)->info.fill_ratio_needed = 1.0;
1924 
1925     b       = (Mat_SeqAIJ *)fact->data;
1926     b->row  = isrow;
1927     b->col  = iscol;
1928     b->icol = isicol;
1929     PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1930     PetscCall(PetscObjectReference((PetscObject)isrow));
1931     PetscCall(PetscObjectReference((PetscObject)iscol));
1932     PetscFunctionReturn(PETSC_SUCCESS);
1933   }
1934 
1935   PetscCall(ISGetIndices(isrow, &r));
1936   PetscCall(ISGetIndices(isicol, &ic));
1937 
1938   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1939   PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi));
1940   PetscCall(PetscMalloc1(n + 1, &bdiag));
1941   bi[0] = bdiag[0] = 0;
1942 
1943   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1944 
1945   /* create a linked list for storing column indices of the active row */
1946   nlnk = n + 1;
1947   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1948 
1949   /* initial FreeSpace size is f*(ai[n]+1) */
1950   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1951   current_space = free_space;
1952   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1953   current_space_lvl = free_space_lvl;
1954 
1955   for (i = 0; i < n; i++) {
1956     nzi = 0;
1957     /* copy current row into linked list */
1958     nnz = ai[r[i] + 1] - ai[r[i]];
1959     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1960     cols   = aj + ai[r[i]];
1961     lnk[i] = -1; /* marker to indicate if diagonal exists */
1962     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1963     nzi += nlnk;
1964 
1965     /* make sure diagonal entry is included */
1966     if (diagonal_fill && lnk[i] == -1) {
1967       fm = n;
1968       while (lnk[fm] < i) fm = lnk[fm];
1969       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1970       lnk[fm]    = i;
1971       lnk_lvl[i] = 0;
1972       nzi++;
1973       dcount++;
1974     }
1975 
1976     /* add pivot rows into the active row */
1977     nzbd = 0;
1978     prow = lnk[n];
1979     while (prow < i) {
1980       nnz      = bdiag[prow];
1981       cols     = bj_ptr[prow] + nnz + 1;
1982       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1983       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1984       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1985       nzi += nlnk;
1986       prow = lnk[prow];
1987       nzbd++;
1988     }
1989     bdiag[i]  = nzbd;
1990     bi[i + 1] = bi[i] + nzi;
1991 
1992     /* if free space is not available, make more free space */
1993     if (current_space->local_remaining < nzi) {
1994       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
1995       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1996       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1997       reallocs++;
1998     }
1999 
2000     /* copy data into free_space and free_space_lvl, then initialize lnk */
2001     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2002     bj_ptr[i]    = current_space->array;
2003     bjlvl_ptr[i] = current_space_lvl->array;
2004 
2005     /* make sure the active row i has diagonal entry */
2006     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
2007 
2008     current_space->array += nzi;
2009     current_space->local_used += nzi;
2010     current_space->local_remaining -= nzi;
2011     current_space_lvl->array += nzi;
2012     current_space_lvl->local_used += nzi;
2013     current_space_lvl->local_remaining -= nzi;
2014   }
2015 
2016   PetscCall(ISRestoreIndices(isrow, &r));
2017   PetscCall(ISRestoreIndices(isicol, &ic));
2018 
2019   /* destroy list of free space and other temporary arrays */
2020   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj));
2021   PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
2022   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2023   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2024   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
2025 
2026   #if defined(PETSC_USE_INFO)
2027   {
2028     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2029     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
2030     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
2031     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
2032     PetscCall(PetscInfo(A, "for best performance.\n"));
2033     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
2034   }
2035   #endif
2036 
2037   /* put together the new matrix */
2038   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
2039   b = (Mat_SeqAIJ *)fact->data;
2040   b->free_ij      = PETSC_TRUE;
2041   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a));
2042   b->free_a       = PETSC_TRUE;
2043   b->j = bj;
2044   b->i = bi;
2045   for (i = 0; i < n; i++) bdiag[i] += bi[i];
2046   b->diag = bdiag;
2047   b->ilen = NULL;
2048   b->imax = NULL;
2049   b->row  = isrow;
2050   b->col  = iscol;
2051   PetscCall(PetscObjectReference((PetscObject)isrow));
2052   PetscCall(PetscObjectReference((PetscObject)iscol));
2053   b->icol = isicol;
2054   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2055   /* In b structure:  Free imax, ilen, old a, old j.
2056      Allocate bdiag, solve_work, new a, new j */
2057   b->maxnz = b->nz = bi[n];
2058 
2059   fact->info.factor_mallocs    = reallocs;
2060   fact->info.fill_ratio_given  = f;
2061   fact->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2062   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
2063   if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2064   PetscFunctionReturn(PETSC_SUCCESS);
2065 }
2066 #endif
2067 
2068 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2069 {
2070   Mat             C  = B;
2071   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2072   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2073   IS              ip = b->row, iip = b->icol;
2074   const PetscInt *rip, *riip;
2075   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
2076   PetscInt       *ai = a->i, *aj = a->j;
2077   PetscInt        k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2078   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2079   PetscBool       perm_identity;
2080   FactorShiftCtx  sctx;
2081   PetscReal       rs;
2082   MatScalar       d, *v;
2083 
2084   PetscFunctionBegin;
2085   /* MatPivotSetUp(): initialize shift context sctx */
2086   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2087 
2088   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2089     sctx.shift_top = info->zeropivot;
2090     for (i = 0; i < mbs; i++) {
2091       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2092       d  = (aa)[a->diag[i]];
2093       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2094       v  = aa + ai[i];
2095       nz = ai[i + 1] - ai[i];
2096       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2097       if (rs > sctx.shift_top) sctx.shift_top = rs;
2098     }
2099     sctx.shift_top *= 1.1;
2100     sctx.nshift_max = 5;
2101     sctx.shift_lo   = 0.;
2102     sctx.shift_hi   = 1.;
2103   }
2104 
2105   PetscCall(ISGetIndices(ip, &rip));
2106   PetscCall(ISGetIndices(iip, &riip));
2107 
2108   /* allocate working arrays
2109      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2110      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2111   */
2112   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
2113 
2114   do {
2115     sctx.newshift = PETSC_FALSE;
2116 
2117     for (i = 0; i < mbs; i++) c2r[i] = mbs;
2118     if (mbs) il[0] = 0;
2119 
2120     for (k = 0; k < mbs; k++) {
2121       /* zero rtmp */
2122       nz    = bi[k + 1] - bi[k];
2123       bjtmp = bj + bi[k];
2124       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2125 
2126       /* load in initial unfactored row */
2127       bval = ba + bi[k];
2128       jmin = ai[rip[k]];
2129       jmax = ai[rip[k] + 1];
2130       for (j = jmin; j < jmax; j++) {
2131         col = riip[aj[j]];
2132         if (col >= k) { /* only take upper triangular entry */
2133           rtmp[col] = aa[j];
2134           *bval++   = 0.0; /* for in-place factorization */
2135         }
2136       }
2137       /* shift the diagonal of the matrix: ZeropivotApply() */
2138       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2139 
2140       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2141       dk = rtmp[k];
2142       i  = c2r[k]; /* first row to be added to k_th row  */
2143 
2144       while (i < k) {
2145         nexti = c2r[i]; /* next row to be added to k_th row */
2146 
2147         /* compute multiplier, update diag(k) and U(i,k) */
2148         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
2149         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2150         dk += uikdi * ba[ili];           /* update diag[k] */
2151         ba[ili] = uikdi;                 /* -U(i,k) */
2152 
2153         /* add multiple of row i to k-th row */
2154         jmin = ili + 1;
2155         jmax = bi[i + 1];
2156         if (jmin < jmax) {
2157           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2158           /* update il and c2r for row i */
2159           il[i]  = jmin;
2160           j      = bj[jmin];
2161           c2r[i] = c2r[j];
2162           c2r[j] = i;
2163         }
2164         i = nexti;
2165       }
2166 
2167       /* copy data into U(k,:) */
2168       rs   = 0.0;
2169       jmin = bi[k];
2170       jmax = bi[k + 1] - 1;
2171       if (jmin < jmax) {
2172         for (j = jmin; j < jmax; j++) {
2173           col   = bj[j];
2174           ba[j] = rtmp[col];
2175           rs += PetscAbsScalar(ba[j]);
2176         }
2177         /* add the k-th row into il and c2r */
2178         il[k]  = jmin;
2179         i      = bj[jmin];
2180         c2r[k] = c2r[i];
2181         c2r[i] = k;
2182       }
2183 
2184       /* MatPivotCheck() */
2185       sctx.rs = rs;
2186       sctx.pv = dk;
2187       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2188       if (sctx.newshift) break;
2189       dk = sctx.pv;
2190 
2191       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2192     }
2193   } while (sctx.newshift);
2194 
2195   PetscCall(PetscFree3(rtmp, il, c2r));
2196   PetscCall(ISRestoreIndices(ip, &rip));
2197   PetscCall(ISRestoreIndices(iip, &riip));
2198 
2199   PetscCall(ISIdentity(ip, &perm_identity));
2200   if (perm_identity) {
2201     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2202     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2203     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2204     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2205   } else {
2206     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2207     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2208     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2209     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2210   }
2211 
2212   C->assembled    = PETSC_TRUE;
2213   C->preallocated = PETSC_TRUE;
2214 
2215   PetscCall(PetscLogFlops(C->rmap->n));
2216 
2217   /* MatPivotView() */
2218   if (sctx.nshift) {
2219     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2220       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2221     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2222       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2223     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2224       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2225     }
2226   }
2227   PetscFunctionReturn(PETSC_SUCCESS);
2228 }
2229 
2230 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2231 {
2232   Mat             C  = B;
2233   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2234   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2235   IS              ip = b->row, iip = b->icol;
2236   const PetscInt *rip, *riip;
2237   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2238   PetscInt       *ai = a->i, *aj = a->j;
2239   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2240   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2241   PetscBool       perm_identity;
2242   FactorShiftCtx  sctx;
2243   PetscReal       rs;
2244   MatScalar       d, *v;
2245 
2246   PetscFunctionBegin;
2247   /* MatPivotSetUp(): initialize shift context sctx */
2248   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2249 
2250   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2251     sctx.shift_top = info->zeropivot;
2252     for (i = 0; i < mbs; i++) {
2253       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2254       d  = (aa)[a->diag[i]];
2255       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2256       v  = aa + ai[i];
2257       nz = ai[i + 1] - ai[i];
2258       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2259       if (rs > sctx.shift_top) sctx.shift_top = rs;
2260     }
2261     sctx.shift_top *= 1.1;
2262     sctx.nshift_max = 5;
2263     sctx.shift_lo   = 0.;
2264     sctx.shift_hi   = 1.;
2265   }
2266 
2267   PetscCall(ISGetIndices(ip, &rip));
2268   PetscCall(ISGetIndices(iip, &riip));
2269 
2270   /* initialization */
2271   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
2272 
2273   do {
2274     sctx.newshift = PETSC_FALSE;
2275 
2276     for (i = 0; i < mbs; i++) jl[i] = mbs;
2277     il[0] = 0;
2278 
2279     for (k = 0; k < mbs; k++) {
2280       /* zero rtmp */
2281       nz    = bi[k + 1] - bi[k];
2282       bjtmp = bj + bi[k];
2283       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2284 
2285       bval = ba + bi[k];
2286       /* initialize k-th row by the perm[k]-th row of A */
2287       jmin = ai[rip[k]];
2288       jmax = ai[rip[k] + 1];
2289       for (j = jmin; j < jmax; j++) {
2290         col = riip[aj[j]];
2291         if (col >= k) { /* only take upper triangular entry */
2292           rtmp[col] = aa[j];
2293           *bval++   = 0.0; /* for in-place factorization */
2294         }
2295       }
2296       /* shift the diagonal of the matrix */
2297       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2298 
2299       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2300       dk = rtmp[k];
2301       i  = jl[k]; /* first row to be added to k_th row  */
2302 
2303       while (i < k) {
2304         nexti = jl[i]; /* next row to be added to k_th row */
2305 
2306         /* compute multiplier, update diag(k) and U(i,k) */
2307         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
2308         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2309         dk += uikdi * ba[ili];
2310         ba[ili] = uikdi; /* -U(i,k) */
2311 
2312         /* add multiple of row i to k-th row */
2313         jmin = ili + 1;
2314         jmax = bi[i + 1];
2315         if (jmin < jmax) {
2316           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2317           /* update il and jl for row i */
2318           il[i] = jmin;
2319           j     = bj[jmin];
2320           jl[i] = jl[j];
2321           jl[j] = i;
2322         }
2323         i = nexti;
2324       }
2325 
2326       /* shift the diagonals when zero pivot is detected */
2327       /* compute rs=sum of abs(off-diagonal) */
2328       rs   = 0.0;
2329       jmin = bi[k] + 1;
2330       nz   = bi[k + 1] - jmin;
2331       bcol = bj + jmin;
2332       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
2333 
2334       sctx.rs = rs;
2335       sctx.pv = dk;
2336       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2337       if (sctx.newshift) break;
2338       dk = sctx.pv;
2339 
2340       /* copy data into U(k,:) */
2341       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2342       jmin      = bi[k] + 1;
2343       jmax      = bi[k + 1];
2344       if (jmin < jmax) {
2345         for (j = jmin; j < jmax; j++) {
2346           col   = bj[j];
2347           ba[j] = rtmp[col];
2348         }
2349         /* add the k-th row into il and jl */
2350         il[k] = jmin;
2351         i     = bj[jmin];
2352         jl[k] = jl[i];
2353         jl[i] = k;
2354       }
2355     }
2356   } while (sctx.newshift);
2357 
2358   PetscCall(PetscFree3(rtmp, il, jl));
2359   PetscCall(ISRestoreIndices(ip, &rip));
2360   PetscCall(ISRestoreIndices(iip, &riip));
2361 
2362   PetscCall(ISIdentity(ip, &perm_identity));
2363   if (perm_identity) {
2364     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2365     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2366     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2367     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2368   } else {
2369     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2370     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2371     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2372     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2373   }
2374 
2375   C->assembled    = PETSC_TRUE;
2376   C->preallocated = PETSC_TRUE;
2377 
2378   PetscCall(PetscLogFlops(C->rmap->n));
2379   if (sctx.nshift) {
2380     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2381       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2382     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2383       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2384     }
2385   }
2386   PetscFunctionReturn(PETSC_SUCCESS);
2387 }
2388 
2389 /*
2390    icc() under revised new data structure.
2391    Factored arrays bj and ba are stored as
2392      U(0,:),...,U(i,:),U(n-1,:)
2393 
2394    ui=fact->i is an array of size n+1, in which
2395    ui+
2396      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2397      ui[n]:  points to U(n-1,n-1)+1
2398 
2399   udiag=fact->diag is an array of size n,in which
2400      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2401 
2402    U(i,:) contains udiag[i] as its last entry, i.e.,
2403     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2404 */
2405 
2406 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2407 {
2408   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2409   Mat_SeqSBAIJ      *b;
2410   PetscBool          perm_identity, missing;
2411   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2412   const PetscInt    *rip, *riip;
2413   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2414   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2415   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2416   PetscReal          fill = info->fill, levels = info->levels;
2417   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2418   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2419   PetscBT            lnkbt;
2420   IS                 iperm;
2421 
2422   PetscFunctionBegin;
2423   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2424   PetscCall(MatMissingDiagonal(A, &missing, &d));
2425   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2426   PetscCall(ISIdentity(perm, &perm_identity));
2427   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2428 
2429   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2430   PetscCall(PetscMalloc1(am + 1, &udiag));
2431   ui[0] = 0;
2432 
2433   /* ICC(0) without matrix ordering: simply rearrange column indices */
2434   if (!levels && perm_identity) {
2435     for (i = 0; i < am; i++) {
2436       ncols     = ai[i + 1] - a->diag[i];
2437       ui[i + 1] = ui[i] + ncols;
2438       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2439     }
2440     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2441     cols = uj;
2442     for (i = 0; i < am; i++) {
2443       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2444       ncols = ai[i + 1] - a->diag[i] - 1;
2445       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2446       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2447     }
2448   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2449     PetscCall(ISGetIndices(iperm, &riip));
2450     PetscCall(ISGetIndices(perm, &rip));
2451 
2452     /* initialization */
2453     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2454 
2455     /* jl: linked list for storing indices of the pivot rows
2456        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2457     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2458     for (i = 0; i < am; i++) {
2459       jl[i] = am;
2460       il[i] = 0;
2461     }
2462 
2463     /* create and initialize a linked list for storing column indices of the active row k */
2464     nlnk = am + 1;
2465     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2466 
2467     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2468     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2469     current_space = free_space;
2470     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2471     current_space_lvl = free_space_lvl;
2472 
2473     for (k = 0; k < am; k++) { /* for each active row k */
2474       /* initialize lnk by the column indices of row rip[k] of A */
2475       nzk   = 0;
2476       ncols = ai[rip[k] + 1] - ai[rip[k]];
2477       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2478       ncols_upper = 0;
2479       for (j = 0; j < ncols; j++) {
2480         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2481         if (riip[i] >= k) {         /* only take upper triangular entry */
2482           ajtmp[ncols_upper] = i;
2483           ncols_upper++;
2484         }
2485       }
2486       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2487       nzk += nlnk;
2488 
2489       /* update lnk by computing fill-in for each pivot row to be merged in */
2490       prow = jl[k]; /* 1st pivot row */
2491 
2492       while (prow < k) {
2493         nextprow = jl[prow];
2494 
2495         /* merge prow into k-th row */
2496         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2497         jmax  = ui[prow + 1];
2498         ncols = jmax - jmin;
2499         i     = jmin - ui[prow];
2500         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2501         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2502         j     = *(uj - 1);
2503         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2504         nzk += nlnk;
2505 
2506         /* update il and jl for prow */
2507         if (jmin < jmax) {
2508           il[prow] = jmin;
2509           j        = *cols;
2510           jl[prow] = jl[j];
2511           jl[j]    = prow;
2512         }
2513         prow = nextprow;
2514       }
2515 
2516       /* if free space is not available, make more free space */
2517       if (current_space->local_remaining < nzk) {
2518         i = am - k + 1;                                    /* num of unfactored rows */
2519         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2520         PetscCall(PetscFreeSpaceGet(i, &current_space));
2521         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2522         reallocs++;
2523       }
2524 
2525       /* copy data into free_space and free_space_lvl, then initialize lnk */
2526       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2527       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2528 
2529       /* add the k-th row into il and jl */
2530       if (nzk > 1) {
2531         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2532         jl[k] = jl[i];
2533         jl[i] = k;
2534         il[k] = ui[k] + 1;
2535       }
2536       uj_ptr[k]     = current_space->array;
2537       uj_lvl_ptr[k] = current_space_lvl->array;
2538 
2539       current_space->array += nzk;
2540       current_space->local_used += nzk;
2541       current_space->local_remaining -= nzk;
2542 
2543       current_space_lvl->array += nzk;
2544       current_space_lvl->local_used += nzk;
2545       current_space_lvl->local_remaining -= nzk;
2546 
2547       ui[k + 1] = ui[k] + nzk;
2548     }
2549 
2550     PetscCall(ISRestoreIndices(perm, &rip));
2551     PetscCall(ISRestoreIndices(iperm, &riip));
2552     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2553     PetscCall(PetscFree(ajtmp));
2554 
2555     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2556     PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2557     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
2558     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2559     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2560 
2561   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2562 
2563   /* put together the new matrix in MATSEQSBAIJ format */
2564   b          = (Mat_SeqSBAIJ *)fact->data;
2565   b->free_ij = PETSC_TRUE;
2566   PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscScalar), (void **)&b->a));
2567   b->free_a    = PETSC_TRUE;
2568   b->j         = uj;
2569   b->i         = ui;
2570   b->diag      = udiag;
2571   b->free_diag = PETSC_TRUE;
2572   b->ilen      = NULL;
2573   b->imax      = NULL;
2574   b->row       = perm;
2575   b->col       = perm;
2576   PetscCall(PetscObjectReference((PetscObject)perm));
2577   PetscCall(PetscObjectReference((PetscObject)perm));
2578   b->icol          = iperm;
2579   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2580 
2581   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2582 
2583   b->maxnz = b->nz = ui[am];
2584 
2585   fact->info.factor_mallocs   = reallocs;
2586   fact->info.fill_ratio_given = fill;
2587   if (ai[am] != 0) {
2588     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2589     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2590   } else {
2591     fact->info.fill_ratio_needed = 0.0;
2592   }
2593 #if defined(PETSC_USE_INFO)
2594   if (ai[am] != 0) {
2595     PetscReal af = fact->info.fill_ratio_needed;
2596     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2597     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2598     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2599   } else {
2600     PetscCall(PetscInfo(A, "Empty matrix\n"));
2601   }
2602 #endif
2603   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2604   PetscFunctionReturn(PETSC_SUCCESS);
2605 }
2606 
2607 #if 0
2608 // unused
2609 static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2610 {
2611   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2612   Mat_SeqSBAIJ      *b;
2613   PetscBool          perm_identity, missing;
2614   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2615   const PetscInt    *rip, *riip;
2616   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2617   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2618   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2619   PetscReal          fill = info->fill, levels = info->levels;
2620   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2621   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2622   PetscBT            lnkbt;
2623   IS                 iperm;
2624 
2625   PetscFunctionBegin;
2626   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2627   PetscCall(MatMissingDiagonal(A, &missing, &d));
2628   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2629   PetscCall(ISIdentity(perm, &perm_identity));
2630   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2631 
2632   PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui));
2633   PetscCall(PetscMalloc1(am + 1, &udiag));
2634   ui[0] = 0;
2635 
2636   /* ICC(0) without matrix ordering: simply copies fill pattern */
2637   if (!levels && perm_identity) {
2638     for (i = 0; i < am; i++) {
2639       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2640       udiag[i]  = ui[i];
2641     }
2642     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2643     cols = uj;
2644     for (i = 0; i < am; i++) {
2645       aj    = a->j + a->diag[i];
2646       ncols = ui[i + 1] - ui[i];
2647       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2648     }
2649   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2650     PetscCall(ISGetIndices(iperm, &riip));
2651     PetscCall(ISGetIndices(perm, &rip));
2652 
2653     /* initialization */
2654     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2655 
2656     /* jl: linked list for storing indices of the pivot rows
2657        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2658     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2659     for (i = 0; i < am; i++) {
2660       jl[i] = am;
2661       il[i] = 0;
2662     }
2663 
2664     /* create and initialize a linked list for storing column indices of the active row k */
2665     nlnk = am + 1;
2666     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2667 
2668     /* initial FreeSpace size is fill*(ai[am]+1) */
2669     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2670     current_space = free_space;
2671     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2672     current_space_lvl = free_space_lvl;
2673 
2674     for (k = 0; k < am; k++) { /* for each active row k */
2675       /* initialize lnk by the column indices of row rip[k] of A */
2676       nzk   = 0;
2677       ncols = ai[rip[k] + 1] - ai[rip[k]];
2678       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2679       ncols_upper = 0;
2680       for (j = 0; j < ncols; j++) {
2681         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2682         if (riip[i] >= k) {         /* only take upper triangular entry */
2683           ajtmp[ncols_upper] = i;
2684           ncols_upper++;
2685         }
2686       }
2687       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2688       nzk += nlnk;
2689 
2690       /* update lnk by computing fill-in for each pivot row to be merged in */
2691       prow = jl[k]; /* 1st pivot row */
2692 
2693       while (prow < k) {
2694         nextprow = jl[prow];
2695 
2696         /* merge prow into k-th row */
2697         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2698         jmax  = ui[prow + 1];
2699         ncols = jmax - jmin;
2700         i     = jmin - ui[prow];
2701         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2702         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2703         j     = *(uj - 1);
2704         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2705         nzk += nlnk;
2706 
2707         /* update il and jl for prow */
2708         if (jmin < jmax) {
2709           il[prow] = jmin;
2710           j        = *cols;
2711           jl[prow] = jl[j];
2712           jl[j]    = prow;
2713         }
2714         prow = nextprow;
2715       }
2716 
2717       /* if free space is not available, make more free space */
2718       if (current_space->local_remaining < nzk) {
2719         i = am - k + 1;                                    /* num of unfactored rows */
2720         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2721         PetscCall(PetscFreeSpaceGet(i, &current_space));
2722         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2723         reallocs++;
2724       }
2725 
2726       /* copy data into free_space and free_space_lvl, then initialize lnk */
2727       PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2728       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2729 
2730       /* add the k-th row into il and jl */
2731       if (nzk > 1) {
2732         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2733         jl[k] = jl[i];
2734         jl[i] = k;
2735         il[k] = ui[k] + 1;
2736       }
2737       uj_ptr[k]     = current_space->array;
2738       uj_lvl_ptr[k] = current_space_lvl->array;
2739 
2740       current_space->array += nzk;
2741       current_space->local_used += nzk;
2742       current_space->local_remaining -= nzk;
2743 
2744       current_space_lvl->array += nzk;
2745       current_space_lvl->local_used += nzk;
2746       current_space_lvl->local_remaining -= nzk;
2747 
2748       ui[k + 1] = ui[k] + nzk;
2749     }
2750 
2751   #if defined(PETSC_USE_INFO)
2752     if (ai[am] != 0) {
2753       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2754       PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2755       PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2756       PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2757     } else {
2758       PetscCall(PetscInfo(A, "Empty matrix\n"));
2759     }
2760   #endif
2761 
2762     PetscCall(ISRestoreIndices(perm, &rip));
2763     PetscCall(ISRestoreIndices(iperm, &riip));
2764     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2765     PetscCall(PetscFree(ajtmp));
2766 
2767     /* destroy list of free space and other temporary array(s) */
2768     PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj));
2769     PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2770     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2771     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2772 
2773   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2774 
2775   /* put together the new matrix in MATSEQSBAIJ format */
2776 
2777   b               = (Mat_SeqSBAIJ *)fact->data;
2778   b->free_ij       = PETSC_TRUE;
2779   PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a));
2780   b->free_a        = PETSC_TRUE;
2781   b->j         = uj;
2782   b->i         = ui;
2783   b->diag      = udiag;
2784   b->free_diag = PETSC_TRUE;
2785   b->ilen      = NULL;
2786   b->imax      = NULL;
2787   b->row       = perm;
2788   b->col       = perm;
2789 
2790   PetscCall(PetscObjectReference((PetscObject)perm));
2791   PetscCall(PetscObjectReference((PetscObject)perm));
2792 
2793   b->icol          = iperm;
2794   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2795   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2796   b->maxnz = b->nz = ui[am];
2797 
2798   fact->info.factor_mallocs   = reallocs;
2799   fact->info.fill_ratio_given = fill;
2800   if (ai[am] != 0) {
2801     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2802   } else {
2803     fact->info.fill_ratio_needed = 0.0;
2804   }
2805   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2806   PetscFunctionReturn(PETSC_SUCCESS);
2807 }
2808 #endif
2809 
2810 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2811 {
2812   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2813   Mat_SeqSBAIJ      *b;
2814   PetscBool          perm_identity, missing;
2815   PetscReal          fill = info->fill;
2816   const PetscInt    *rip, *riip;
2817   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2818   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2819   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2820   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2821   PetscBT            lnkbt;
2822   IS                 iperm;
2823 
2824   PetscFunctionBegin;
2825   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2826   PetscCall(MatMissingDiagonal(A, &missing, &i));
2827   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2828 
2829   /* check whether perm is the identity mapping */
2830   PetscCall(ISIdentity(perm, &perm_identity));
2831   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2832   PetscCall(ISGetIndices(iperm, &riip));
2833   PetscCall(ISGetIndices(perm, &rip));
2834 
2835   /* initialization */
2836   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2837   PetscCall(PetscMalloc1(am + 1, &udiag));
2838   ui[0] = 0;
2839 
2840   /* jl: linked list for storing indices of the pivot rows
2841      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2842   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2843   for (i = 0; i < am; i++) {
2844     jl[i] = am;
2845     il[i] = 0;
2846   }
2847 
2848   /* create and initialize a linked list for storing column indices of the active row k */
2849   nlnk = am + 1;
2850   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2851 
2852   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2853   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2854   current_space = free_space;
2855 
2856   for (k = 0; k < am; k++) { /* for each active row k */
2857     /* initialize lnk by the column indices of row rip[k] of A */
2858     nzk   = 0;
2859     ncols = ai[rip[k] + 1] - ai[rip[k]];
2860     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2861     ncols_upper = 0;
2862     for (j = 0; j < ncols; j++) {
2863       i = riip[*(aj + ai[rip[k]] + j)];
2864       if (i >= k) { /* only take upper triangular entry */
2865         cols[ncols_upper] = i;
2866         ncols_upper++;
2867       }
2868     }
2869     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2870     nzk += nlnk;
2871 
2872     /* update lnk by computing fill-in for each pivot row to be merged in */
2873     prow = jl[k]; /* 1st pivot row */
2874 
2875     while (prow < k) {
2876       nextprow = jl[prow];
2877       /* merge prow into k-th row */
2878       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2879       jmax   = ui[prow + 1];
2880       ncols  = jmax - jmin;
2881       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2882       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2883       nzk += nlnk;
2884 
2885       /* update il and jl for prow */
2886       if (jmin < jmax) {
2887         il[prow] = jmin;
2888         j        = *uj_ptr;
2889         jl[prow] = jl[j];
2890         jl[j]    = prow;
2891       }
2892       prow = nextprow;
2893     }
2894 
2895     /* if free space is not available, make more free space */
2896     if (current_space->local_remaining < nzk) {
2897       i = am - k + 1;                                    /* num of unfactored rows */
2898       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2899       PetscCall(PetscFreeSpaceGet(i, &current_space));
2900       reallocs++;
2901     }
2902 
2903     /* copy data into free space, then initialize lnk */
2904     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2905 
2906     /* add the k-th row into il and jl */
2907     if (nzk > 1) {
2908       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2909       jl[k] = jl[i];
2910       jl[i] = k;
2911       il[k] = ui[k] + 1;
2912     }
2913     ui_ptr[k] = current_space->array;
2914 
2915     current_space->array += nzk;
2916     current_space->local_used += nzk;
2917     current_space->local_remaining -= nzk;
2918 
2919     ui[k + 1] = ui[k] + nzk;
2920   }
2921 
2922   PetscCall(ISRestoreIndices(perm, &rip));
2923   PetscCall(ISRestoreIndices(iperm, &riip));
2924   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2925 
2926   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2927   PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2928   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2929   PetscCall(PetscLLDestroy(lnk, lnkbt));
2930 
2931   /* put together the new matrix in MATSEQSBAIJ format */
2932   b          = (Mat_SeqSBAIJ *)fact->data;
2933   b->free_ij = PETSC_TRUE;
2934   PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscScalar), (void **)&b->a));
2935   b->free_a    = PETSC_TRUE;
2936   b->j         = uj;
2937   b->i         = ui;
2938   b->diag      = udiag;
2939   b->free_diag = PETSC_TRUE;
2940   b->ilen      = NULL;
2941   b->imax      = NULL;
2942   b->row       = perm;
2943   b->col       = perm;
2944 
2945   PetscCall(PetscObjectReference((PetscObject)perm));
2946   PetscCall(PetscObjectReference((PetscObject)perm));
2947 
2948   b->icol          = iperm;
2949   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2950 
2951   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2952 
2953   b->maxnz = b->nz = ui[am];
2954 
2955   fact->info.factor_mallocs   = reallocs;
2956   fact->info.fill_ratio_given = fill;
2957   if (ai[am] != 0) {
2958     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2959     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2960   } else {
2961     fact->info.fill_ratio_needed = 0.0;
2962   }
2963 #if defined(PETSC_USE_INFO)
2964   if (ai[am] != 0) {
2965     PetscReal af = fact->info.fill_ratio_needed;
2966     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2967     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2968     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2969   } else {
2970     PetscCall(PetscInfo(A, "Empty matrix\n"));
2971   }
2972 #endif
2973   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2974   PetscFunctionReturn(PETSC_SUCCESS);
2975 }
2976 
2977 #if 0
2978 // unused
2979 static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2980 {
2981   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2982   Mat_SeqSBAIJ      *b;
2983   PetscBool          perm_identity, missing;
2984   PetscReal          fill = info->fill;
2985   const PetscInt    *rip, *riip;
2986   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2987   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2988   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
2989   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2990   PetscBT            lnkbt;
2991   IS                 iperm;
2992 
2993   PetscFunctionBegin;
2994   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2995   PetscCall(MatMissingDiagonal(A, &missing, &i));
2996   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2997 
2998   /* check whether perm is the identity mapping */
2999   PetscCall(ISIdentity(perm, &perm_identity));
3000   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
3001   PetscCall(ISGetIndices(iperm, &riip));
3002   PetscCall(ISGetIndices(perm, &rip));
3003 
3004   /* initialization */
3005   PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui));
3006   ui[0] = 0;
3007 
3008   /* jl: linked list for storing indices of the pivot rows
3009      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3010   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
3011   for (i = 0; i < am; i++) {
3012     jl[i] = am;
3013     il[i] = 0;
3014   }
3015 
3016   /* create and initialize a linked list for storing column indices of the active row k */
3017   nlnk = am + 1;
3018   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
3019 
3020   /* initial FreeSpace size is fill*(ai[am]+1) */
3021   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
3022   current_space = free_space;
3023 
3024   for (k = 0; k < am; k++) { /* for each active row k */
3025     /* initialize lnk by the column indices of row rip[k] of A */
3026     nzk   = 0;
3027     ncols = ai[rip[k] + 1] - ai[rip[k]];
3028     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
3029     ncols_upper = 0;
3030     for (j = 0; j < ncols; j++) {
3031       i = riip[*(aj + ai[rip[k]] + j)];
3032       if (i >= k) { /* only take upper triangular entry */
3033         cols[ncols_upper] = i;
3034         ncols_upper++;
3035       }
3036     }
3037     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
3038     nzk += nlnk;
3039 
3040     /* update lnk by computing fill-in for each pivot row to be merged in */
3041     prow = jl[k]; /* 1st pivot row */
3042 
3043     while (prow < k) {
3044       nextprow = jl[prow];
3045       /* merge prow into k-th row */
3046       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3047       jmax   = ui[prow + 1];
3048       ncols  = jmax - jmin;
3049       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3050       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
3051       nzk += nlnk;
3052 
3053       /* update il and jl for prow */
3054       if (jmin < jmax) {
3055         il[prow] = jmin;
3056         j        = *uj_ptr;
3057         jl[prow] = jl[j];
3058         jl[j]    = prow;
3059       }
3060       prow = nextprow;
3061     }
3062 
3063     /* if free space is not available, make more free space */
3064     if (current_space->local_remaining < nzk) {
3065       i = am - k + 1;                     /* num of unfactored rows */
3066       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3067       PetscCall(PetscFreeSpaceGet(i, &current_space));
3068       reallocs++;
3069     }
3070 
3071     /* copy data into free space, then initialize lnk */
3072     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
3073 
3074     /* add the k-th row into il and jl */
3075     if (nzk - 1 > 0) {
3076       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3077       jl[k] = jl[i];
3078       jl[i] = k;
3079       il[k] = ui[k] + 1;
3080     }
3081     ui_ptr[k] = current_space->array;
3082 
3083     current_space->array += nzk;
3084     current_space->local_used += nzk;
3085     current_space->local_remaining -= nzk;
3086 
3087     ui[k + 1] = ui[k] + nzk;
3088   }
3089 
3090   #if defined(PETSC_USE_INFO)
3091   if (ai[am] != 0) {
3092     PetscReal af = (PetscReal)ui[am] / (PetscReal)ai[am];
3093     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3094     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3095     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3096   } else {
3097     PetscCall(PetscInfo(A, "Empty matrix\n"));
3098   }
3099   #endif
3100 
3101   PetscCall(ISRestoreIndices(perm, &rip));
3102   PetscCall(ISRestoreIndices(iperm, &riip));
3103   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
3104 
3105   /* destroy list of free space and other temporary array(s) */
3106   PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj));
3107   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3108   PetscCall(PetscLLDestroy(lnk, lnkbt));
3109 
3110   /* put together the new matrix in MATSEQSBAIJ format */
3111   b               = (Mat_SeqSBAIJ *)fact->data;
3112   b->free_ij      = PETSC_TRUE;
3113   PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a));
3114   b->free_a       = PETSC_TRUE;
3115   b->j    = uj;
3116   b->i    = ui;
3117   b->diag = NULL;
3118   b->ilen = NULL;
3119   b->imax = NULL;
3120   b->row  = perm;
3121   b->col  = perm;
3122 
3123   PetscCall(PetscObjectReference((PetscObject)perm));
3124   PetscCall(PetscObjectReference((PetscObject)perm));
3125 
3126   b->icol          = iperm;
3127   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3128 
3129   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3130   b->maxnz = b->nz = ui[am];
3131 
3132   fact->info.factor_mallocs   = reallocs;
3133   fact->info.fill_ratio_given = fill;
3134   if (ai[am] != 0) {
3135     fact->info.fill_ratio_needed = (PetscReal)ui[am] / (PetscReal)ai[am];
3136   } else {
3137     fact->info.fill_ratio_needed = 0.0;
3138   }
3139   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3140   PetscFunctionReturn(PETSC_SUCCESS);
3141 }
3142 #endif
3143 
3144 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3145 {
3146   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3147   PetscInt           n  = A->rmap->n;
3148   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3149   PetscScalar       *x, sum;
3150   const PetscScalar *b;
3151   const MatScalar   *aa = a->a, *v;
3152   PetscInt           i, nz;
3153 
3154   PetscFunctionBegin;
3155   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3156 
3157   PetscCall(VecGetArrayRead(bb, &b));
3158   PetscCall(VecGetArrayWrite(xx, &x));
3159 
3160   /* forward solve the lower triangular */
3161   x[0] = b[0];
3162   v    = aa;
3163   vi   = aj;
3164   for (i = 1; i < n; i++) {
3165     nz  = ai[i + 1] - ai[i];
3166     sum = b[i];
3167     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3168     v += nz;
3169     vi += nz;
3170     x[i] = sum;
3171   }
3172 
3173   /* backward solve the upper triangular */
3174   for (i = n - 1; i >= 0; i--) {
3175     v   = aa + adiag[i + 1] + 1;
3176     vi  = aj + adiag[i + 1] + 1;
3177     nz  = adiag[i] - adiag[i + 1] - 1;
3178     sum = x[i];
3179     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3180     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3181   }
3182 
3183   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3184   PetscCall(VecRestoreArrayRead(bb, &b));
3185   PetscCall(VecRestoreArrayWrite(xx, &x));
3186   PetscFunctionReturn(PETSC_SUCCESS);
3187 }
3188 
3189 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3190 {
3191   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3192   IS                 iscol = a->col, isrow = a->row;
3193   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3194   const PetscInt    *rout, *cout, *r, *c;
3195   PetscScalar       *x, *tmp, sum;
3196   const PetscScalar *b;
3197   const MatScalar   *aa = a->a, *v;
3198 
3199   PetscFunctionBegin;
3200   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3201 
3202   PetscCall(VecGetArrayRead(bb, &b));
3203   PetscCall(VecGetArrayWrite(xx, &x));
3204   tmp = a->solve_work;
3205 
3206   PetscCall(ISGetIndices(isrow, &rout));
3207   r = rout;
3208   PetscCall(ISGetIndices(iscol, &cout));
3209   c = cout;
3210 
3211   /* forward solve the lower triangular */
3212   tmp[0] = b[r[0]];
3213   v      = aa;
3214   vi     = aj;
3215   for (i = 1; i < n; i++) {
3216     nz  = ai[i + 1] - ai[i];
3217     sum = b[r[i]];
3218     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3219     tmp[i] = sum;
3220     v += nz;
3221     vi += nz;
3222   }
3223 
3224   /* backward solve the upper triangular */
3225   for (i = n - 1; i >= 0; i--) {
3226     v   = aa + adiag[i + 1] + 1;
3227     vi  = aj + adiag[i + 1] + 1;
3228     nz  = adiag[i] - adiag[i + 1] - 1;
3229     sum = tmp[i];
3230     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3231     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3232   }
3233 
3234   PetscCall(ISRestoreIndices(isrow, &rout));
3235   PetscCall(ISRestoreIndices(iscol, &cout));
3236   PetscCall(VecRestoreArrayRead(bb, &b));
3237   PetscCall(VecRestoreArrayWrite(xx, &x));
3238   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3239   PetscFunctionReturn(PETSC_SUCCESS);
3240 }
3241 
3242 #if 0
3243 // unused
3244 /*
3245     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3246 */
3247 static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3248 {
3249   Mat             B = *fact;
3250   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3251   IS              isicol;
3252   const PetscInt *r, *ic;
3253   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3254   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3255   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3256   PetscInt        nlnk, *lnk;
3257   PetscBT         lnkbt;
3258   PetscBool       row_identity, icol_identity;
3259   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3260   const PetscInt *ics;
3261   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3262   PetscReal       dt = info->dt, shift = info->shiftamount;
3263   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3264   PetscBool       missing;
3265 
3266   PetscFunctionBegin;
3267   if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005;
3268   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3269 
3270   /* symbolic factorization, can be reused */
3271   PetscCall(MatMissingDiagonal(A, &missing, &i));
3272   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3273   adiag = a->diag;
3274 
3275   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3276 
3277   /* bdiag is location of diagonal in factor */
3278   PetscCall(PetscMalloc1(n + 1, &bdiag));     /* becomes b->diag */
3279   PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */
3280 
3281   /* allocate row pointers bi */
3282   PetscCall(PetscShmgetAllocateArray(2 * n + 2,sizeof(PetscInt),(void **)&bi));
3283 
3284   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3285   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3286   nnz_max = ai[n] + 2 * n * dtcount + 2;
3287 
3288   PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscInt),(void **)&bj));
3289   PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscScalar),(void **)&ba));
3290 
3291   /* put together the new matrix */
3292   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3293   b = (Mat_SeqAIJ *)B->data;
3294   b->free_a       = PETSC_TRUE;
3295   b->free_ij      = PETSC_TRUE;
3296   b->a    = ba;
3297   b->j    = bj;
3298   b->i    = bi;
3299   b->diag = bdiag;
3300   b->ilen = NULL;
3301   b->imax = NULL;
3302   b->row  = isrow;
3303   b->col  = iscol;
3304   PetscCall(PetscObjectReference((PetscObject)isrow));
3305   PetscCall(PetscObjectReference((PetscObject)iscol));
3306   b->icol = isicol;
3307 
3308   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3309   b->maxnz = nnz_max;
3310 
3311   B->factortype            = MAT_FACTOR_ILUDT;
3312   B->info.factor_mallocs   = 0;
3313   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3314   /*  end of symbolic factorization */
3315 
3316   PetscCall(ISGetIndices(isrow, &r));
3317   PetscCall(ISGetIndices(isicol, &ic));
3318   ics = ic;
3319 
3320   /* linked list for storing column indices of the active row */
3321   nlnk = n + 1;
3322   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
3323 
3324   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3325   PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3326   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3327   PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3328   PetscCall(PetscArrayzero(rtmp, n));
3329 
3330   bi[0]         = 0;
3331   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3332   bdiag_rev[n]  = bdiag[0];
3333   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3334   for (i = 0; i < n; i++) {
3335     /* copy initial fill into linked list */
3336     nzi = ai[r[i] + 1] - ai[r[i]];
3337     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3338     nzi_al = adiag[r[i]] - ai[r[i]];
3339     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3340     ajtmp  = aj + ai[r[i]];
3341     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3342 
3343     /* load in initial (unfactored row) */
3344     aatmp = a->a + ai[r[i]];
3345     for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;
3346 
3347     /* add pivot rows into linked list */
3348     row = lnk[n];
3349     while (row < i) {
3350       nzi_bl = bi[row + 1] - bi[row] + 1;
3351       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3352       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3353       nzi += nlnk;
3354       row = lnk[row];
3355     }
3356 
3357     /* copy data from lnk into jtmp, then initialize lnk */
3358     PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));
3359 
3360     /* numerical factorization */
3361     bjtmp = jtmp;
3362     row   = *bjtmp++; /* 1st pivot row */
3363     while (row < i) {
3364       pc         = rtmp + row;
3365       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3366       multiplier = (*pc) * (*pv);
3367       *pc        = multiplier;
3368       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3369         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3370         pv = ba + bdiag[row + 1] + 1;
3371         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3372         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3373         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3374       }
3375       row = *bjtmp++;
3376     }
3377 
3378     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3379     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3380     nzi_bl   = 0;
3381     j        = 0;
3382     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3383       vtmp[j]       = rtmp[jtmp[j]];
3384       rtmp[jtmp[j]] = 0.0;
3385       nzi_bl++;
3386       j++;
3387     }
3388     nzi_bu = nzi - nzi_bl - 1;
3389     while (j < nzi) {
3390       vtmp[j]       = rtmp[jtmp[j]];
3391       rtmp[jtmp[j]] = 0.0;
3392       j++;
3393     }
3394 
3395     bjtmp = bj + bi[i];
3396     batmp = ba + bi[i];
3397     /* apply level dropping rule to L part */
3398     ncut = nzi_al + dtcount;
3399     if (ncut < nzi_bl) {
3400       PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3401       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3402     } else {
3403       ncut = nzi_bl;
3404     }
3405     for (j = 0; j < ncut; j++) {
3406       bjtmp[j] = jtmp[j];
3407       batmp[j] = vtmp[j];
3408     }
3409     bi[i + 1] = bi[i] + ncut;
3410     nzi       = ncut + 1;
3411 
3412     /* apply level dropping rule to U part */
3413     ncut = nzi_au + dtcount;
3414     if (ncut < nzi_bu) {
3415       PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3416       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3417     } else {
3418       ncut = nzi_bu;
3419     }
3420     nzi += ncut;
3421 
3422     /* mark bdiagonal */
3423     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3424     bdiag_rev[n - i - 1] = bdiag[i + 1];
3425     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
3426     bjtmp                = bj + bdiag[i];
3427     batmp                = ba + bdiag[i];
3428     *bjtmp               = i;
3429     *batmp               = diag_tmp; /* rtmp[i]; */
3430     if (*batmp == 0.0) *batmp = dt + shift;
3431     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
3432 
3433     bjtmp = bj + bdiag[i + 1] + 1;
3434     batmp = ba + bdiag[i + 1] + 1;
3435     for (k = 0; k < ncut; k++) {
3436       bjtmp[k] = jtmp[nzi_bl + 1 + k];
3437       batmp[k] = vtmp[nzi_bl + 1 + k];
3438     }
3439 
3440     im[i] = nzi; /* used by PetscLLAddSortedLU() */
3441   }              /* for (i=0; i<n; i++) */
3442   PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);
3443 
3444   PetscCall(ISRestoreIndices(isrow, &r));
3445   PetscCall(ISRestoreIndices(isicol, &ic));
3446 
3447   PetscCall(PetscLLDestroy(lnk, lnkbt));
3448   PetscCall(PetscFree2(im, jtmp));
3449   PetscCall(PetscFree2(rtmp, vtmp));
3450   PetscCall(PetscFree(bdiag_rev));
3451 
3452   PetscCall(PetscLogFlops(B->cmap->n));
3453   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3454 
3455   PetscCall(ISIdentity(isrow, &row_identity));
3456   PetscCall(ISIdentity(isicol, &icol_identity));
3457   if (row_identity && icol_identity) {
3458     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3459   } else {
3460     B->ops->solve = MatSolve_SeqAIJ;
3461   }
3462 
3463   B->ops->solveadd          = NULL;
3464   B->ops->solvetranspose    = NULL;
3465   B->ops->solvetransposeadd = NULL;
3466   B->ops->matsolve          = NULL;
3467   B->assembled              = PETSC_TRUE;
3468   B->preallocated           = PETSC_TRUE;
3469   PetscFunctionReturn(PETSC_SUCCESS);
3470 }
3471 
3472 /* a wrapper of MatILUDTFactor_SeqAIJ() */
3473 /*
3474     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3475 */
3476 
3477 static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3478 {
3479   PetscFunctionBegin;
3480   PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3481   PetscFunctionReturn(PETSC_SUCCESS);
3482 }
3483 
3484 /*
3485    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3486    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3487 */
3488 /*
3489     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3490 */
3491 
3492 static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3493 {
3494   Mat             C = fact;
3495   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3496   IS              isrow = b->row, isicol = b->icol;
3497   const PetscInt *r, *ic, *ics;
3498   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3499   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3500   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3501   PetscReal       dt = info->dt, shift = info->shiftamount;
3502   PetscBool       row_identity, col_identity;
3503 
3504   PetscFunctionBegin;
3505   PetscCall(ISGetIndices(isrow, &r));
3506   PetscCall(ISGetIndices(isicol, &ic));
3507   PetscCall(PetscMalloc1(n + 1, &rtmp));
3508   ics = ic;
3509 
3510   for (i = 0; i < n; i++) {
3511     /* initialize rtmp array */
3512     nzl   = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */
3513     bjtmp = bj + bi[i];
3514     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3515     rtmp[i] = 0.0;
3516     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */
3517     bjtmp   = bj + bdiag[i + 1] + 1;
3518     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
3519 
3520     /* load in initial unfactored row of A */
3521     nz    = ai[r[i] + 1] - ai[r[i]];
3522     ajtmp = aj + ai[r[i]];
3523     v     = aa + ai[r[i]];
3524     for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];
3525 
3526     /* numerical factorization */
3527     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3528     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3529     k     = 0;
3530     while (k < nzl) {
3531       row        = *bjtmp++;
3532       pc         = rtmp + row;
3533       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3534       multiplier = (*pc) * (*pv);
3535       *pc        = multiplier;
3536       if (PetscAbsScalar(multiplier) > dt) {
3537         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3538         pv = b->a + bdiag[row + 1] + 1;
3539         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3540         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3541         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3542       }
3543       k++;
3544     }
3545 
3546     /* finished row so stick it into b->a */
3547     /* L-part */
3548     pv  = b->a + bi[i];
3549     pj  = bj + bi[i];
3550     nzl = bi[i + 1] - bi[i];
3551     for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];
3552 
3553     /* diagonal: invert diagonal entries for simpler triangular solves */
3554     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3555     b->a[bdiag[i]] = 1.0 / rtmp[i];
3556 
3557     /* U-part */
3558     pv  = b->a + bdiag[i + 1] + 1;
3559     pj  = bj + bdiag[i + 1] + 1;
3560     nzu = bdiag[i] - bdiag[i + 1] - 1;
3561     for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3562   }
3563 
3564   PetscCall(PetscFree(rtmp));
3565   PetscCall(ISRestoreIndices(isicol, &ic));
3566   PetscCall(ISRestoreIndices(isrow, &r));
3567 
3568   PetscCall(ISIdentity(isrow, &row_identity));
3569   PetscCall(ISIdentity(isicol, &col_identity));
3570   if (row_identity && col_identity) {
3571     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3572   } else {
3573     C->ops->solve = MatSolve_SeqAIJ;
3574   }
3575   C->ops->solveadd          = NULL;
3576   C->ops->solvetranspose    = NULL;
3577   C->ops->solvetransposeadd = NULL;
3578   C->ops->matsolve          = NULL;
3579   C->assembled              = PETSC_TRUE;
3580   C->preallocated           = PETSC_TRUE;
3581 
3582   PetscCall(PetscLogFlops(C->cmap->n));
3583   PetscFunctionReturn(PETSC_SUCCESS);
3584 }
3585 #endif
3586