xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision bcd4bb4a4158aa96f212e9537e87b40407faf83e)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/sbaij/seq/sbaij.h>
3 #include <petscbt.h>
4 #include <../src/mat/utils/freespace.h>
5 
6 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
7 {
8   PetscFunctionBegin;
9   *type = MATSOLVERPETSC;
10   PetscFunctionReturn(PETSC_SUCCESS);
11 }
12 
13 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
14 {
15   PetscInt n = A->rmap->n;
16 
17   PetscFunctionBegin;
18 #if defined(PETSC_USE_COMPLEX)
19   if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
20     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
21     *B = NULL;
22     PetscFunctionReturn(PETSC_SUCCESS);
23   }
24 #endif
25 
26   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
27   PetscCall(MatSetSizes(*B, n, n, n, n));
28   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
29     PetscCall(MatSetType(*B, MATSEQAIJ));
30 
31     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
32     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
33 
34     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
35     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
36     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
37     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
38   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
39     PetscCall(MatSetType(*B, MATSEQSBAIJ));
40     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
41 
42     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
43     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
44     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
45     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
46   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
47   (*B)->factortype = ftype;
48 
49   PetscCall(PetscFree((*B)->solvertype));
50   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
51   (*B)->canuseordering = PETSC_TRUE;
52   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
53   PetscFunctionReturn(PETSC_SUCCESS);
54 }
55 
56 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
57 {
58   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
59   IS                 isicol;
60   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
61   PetscInt           i, n = A->rmap->n;
62   PetscInt          *bi, *bj;
63   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
64   PetscReal          f;
65   PetscInt           nlnk, *lnk, k, **bi_ptr;
66   PetscFreeSpaceList free_space = NULL, current_space = NULL;
67   PetscBT            lnkbt;
68   PetscBool          diagDense;
69 
70   PetscFunctionBegin;
71   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
72   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
73   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
74 
75   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
76   PetscCall(ISGetIndices(isrow, &r));
77   PetscCall(ISGetIndices(isicol, &ic));
78 
79   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
80   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
81   PetscCall(PetscMalloc1(n + 1, &bdiag));
82   bi[0] = bdiag[0] = 0;
83 
84   /* linked list for storing column indices of the active row */
85   nlnk = n + 1;
86   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
87 
88   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
89 
90   /* initial FreeSpace size is f*(ai[n]+1) */
91   f = info->fill;
92   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
93   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
94   current_space = free_space;
95 
96   for (i = 0; i < n; i++) {
97     /* copy previous fill into linked list */
98     nzi   = 0;
99     nnz   = ai[r[i] + 1] - ai[r[i]];
100     ajtmp = aj + ai[r[i]];
101     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
102     nzi += nlnk;
103 
104     /* add pivot rows into linked list */
105     row = lnk[n];
106     while (row < i) {
107       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
108       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
109       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
110       nzi += nlnk;
111       row = lnk[row];
112     }
113     bi[i + 1] = bi[i] + nzi;
114     im[i]     = nzi;
115 
116     /* mark bdiag */
117     nzbd = 0;
118     nnz  = nzi;
119     k    = lnk[n];
120     while (nnz-- && k < i) {
121       nzbd++;
122       k = lnk[k];
123     }
124     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
125 
126     /* if free space is not available, make more free space */
127     if (current_space->local_remaining < nzi) {
128       /* estimated additional space needed */
129       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
130       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
131       reallocs++;
132     }
133 
134     /* copy data into free space, then initialize lnk */
135     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
136 
137     bi_ptr[i] = current_space->array;
138     current_space->array += nzi;
139     current_space->local_used += nzi;
140     current_space->local_remaining -= nzi;
141   }
142 
143   PetscCall(ISRestoreIndices(isrow, &r));
144   PetscCall(ISRestoreIndices(isicol, &ic));
145 
146   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
147   PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
148   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
149   PetscCall(PetscLLDestroy(lnk, lnkbt));
150   PetscCall(PetscFree2(bi_ptr, im));
151 
152   /* put together the new matrix */
153   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
154   b          = (Mat_SeqAIJ *)B->data;
155   b->free_ij = PETSC_TRUE;
156   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
157   b->free_a = PETSC_TRUE;
158   b->j      = bj;
159   b->i      = bi;
160   b->diag   = bdiag;
161   b->ilen   = NULL;
162   b->imax   = NULL;
163   b->row    = isrow;
164   b->col    = iscol;
165   PetscCall(PetscObjectReference((PetscObject)isrow));
166   PetscCall(PetscObjectReference((PetscObject)iscol));
167   b->icol = isicol;
168   PetscCall(PetscMalloc1(n, &b->solve_work));
169 
170   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
171   b->maxnz = b->nz = bdiag[0] + 1;
172 
173   B->factortype            = MAT_FACTOR_LU;
174   B->info.factor_mallocs   = reallocs;
175   B->info.fill_ratio_given = f;
176 
177   if (ai[n]) {
178     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
179   } else {
180     B->info.fill_ratio_needed = 0.0;
181   }
182 #if defined(PETSC_USE_INFO)
183   if (ai[n] != 0) {
184     PetscReal af = B->info.fill_ratio_needed;
185     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
186     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
187     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
188     PetscCall(PetscInfo(A, "for best performance.\n"));
189   } else {
190     PetscCall(PetscInfo(A, "Empty matrix\n"));
191   }
192 #endif
193   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
194   if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
195   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
196   PetscFunctionReturn(PETSC_SUCCESS);
197 }
198 
199 /*
200     Trouble in factorization, should we dump the original matrix?
201 */
202 PetscErrorCode MatFactorDumpMatrix(Mat A)
203 {
204   PetscBool flg = PETSC_FALSE;
205 
206   PetscFunctionBegin;
207   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
208   if (flg) {
209     PetscViewer viewer;
210     char        filename[PETSC_MAX_PATH_LEN];
211 
212     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
213     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
214     PetscCall(MatView(A, viewer));
215     PetscCall(PetscViewerDestroy(&viewer));
216   }
217   PetscFunctionReturn(PETSC_SUCCESS);
218 }
219 
220 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
221 {
222   Mat              C = B;
223   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
224   IS               isrow = b->row, isicol = b->icol;
225   const PetscInt  *r, *ic, *ics;
226   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
227   PetscInt         i, j, k, nz, nzL, row, *pj;
228   const PetscInt  *ajtmp, *bjtmp;
229   MatScalar       *rtmp, *pc, multiplier, *pv;
230   const MatScalar *aa, *v;
231   MatScalar       *ba;
232   PetscBool        row_identity, col_identity;
233   FactorShiftCtx   sctx;
234   const PetscInt  *ddiag;
235   PetscReal        rs;
236   MatScalar        d;
237 
238   PetscFunctionBegin;
239   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
240   PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
241   /* MatPivotSetUp(): initialize shift context sctx */
242   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
243 
244   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
245     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
246     sctx.shift_top = info->zeropivot;
247     for (i = 0; i < n; i++) {
248       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
249       d  = aa[ddiag[i]];
250       rs = -PetscAbsScalar(d) - PetscRealPart(d);
251       v  = aa + ai[i];
252       nz = ai[i + 1] - ai[i];
253       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
254       if (rs > sctx.shift_top) sctx.shift_top = rs;
255     }
256     sctx.shift_top *= 1.1;
257     sctx.nshift_max = 5;
258     sctx.shift_lo   = 0.;
259     sctx.shift_hi   = 1.;
260   }
261 
262   PetscCall(ISGetIndices(isrow, &r));
263   PetscCall(ISGetIndices(isicol, &ic));
264   PetscCall(PetscMalloc1(n + 1, &rtmp));
265   ics = ic;
266 
267   do {
268     sctx.newshift = PETSC_FALSE;
269     for (i = 0; i < n; i++) {
270       /* zero rtmp */
271       /* L part */
272       nz    = bi[i + 1] - bi[i];
273       bjtmp = bj + bi[i];
274       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
275 
276       /* U part */
277       nz    = bdiag[i] - bdiag[i + 1];
278       bjtmp = bj + bdiag[i + 1] + 1;
279       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
280 
281       /* load in initial (unfactored row) */
282       nz    = ai[r[i] + 1] - ai[r[i]];
283       ajtmp = aj + ai[r[i]];
284       v     = aa + ai[r[i]];
285       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
286       /* ZeropivotApply() */
287       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
288 
289       /* elimination */
290       bjtmp = bj + bi[i];
291       row   = *bjtmp++;
292       nzL   = bi[i + 1] - bi[i];
293       for (k = 0; k < nzL; k++) {
294         pc = rtmp + row;
295         if (*pc != 0.0) {
296           pv         = ba + bdiag[row];
297           multiplier = *pc * (*pv);
298           *pc        = multiplier;
299 
300           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
301           pv = ba + bdiag[row + 1] + 1;
302           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
303 
304           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
305           PetscCall(PetscLogFlops(1 + 2.0 * nz));
306         }
307         row = *bjtmp++;
308       }
309 
310       /* finished row so stick it into b->a */
311       rs = 0.0;
312       /* L part */
313       pv = ba + bi[i];
314       pj = b->j + bi[i];
315       nz = bi[i + 1] - bi[i];
316       for (j = 0; j < nz; j++) {
317         pv[j] = rtmp[pj[j]];
318         rs += PetscAbsScalar(pv[j]);
319       }
320 
321       /* U part */
322       pv = ba + bdiag[i + 1] + 1;
323       pj = b->j + bdiag[i + 1] + 1;
324       nz = bdiag[i] - bdiag[i + 1] - 1;
325       for (j = 0; j < nz; j++) {
326         pv[j] = rtmp[pj[j]];
327         rs += PetscAbsScalar(pv[j]);
328       }
329 
330       sctx.rs = rs;
331       sctx.pv = rtmp[i];
332       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
333       if (sctx.newshift) break; /* break for-loop */
334       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
335 
336       /* Mark diagonal and invert diagonal for simpler triangular solves */
337       pv  = ba + bdiag[i];
338       *pv = 1.0 / rtmp[i];
339 
340     } /* endof for (i=0; i<n; i++) { */
341 
342     /* MatPivotRefine() */
343     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
344       /*
345        * if no shift in this attempt & shifting & started shifting & can refine,
346        * then try lower shift
347        */
348       sctx.shift_hi       = sctx.shift_fraction;
349       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
350       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
351       sctx.newshift       = PETSC_TRUE;
352       sctx.nshift++;
353     }
354   } while (sctx.newshift);
355 
356   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
357   PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
358 
359   PetscCall(PetscFree(rtmp));
360   PetscCall(ISRestoreIndices(isicol, &ic));
361   PetscCall(ISRestoreIndices(isrow, &r));
362 
363   PetscCall(ISIdentity(isrow, &row_identity));
364   PetscCall(ISIdentity(isicol, &col_identity));
365   if (b->inode.size_csr) {
366     C->ops->solve = MatSolve_SeqAIJ_Inode;
367   } else if (row_identity && col_identity) {
368     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
369   } else {
370     C->ops->solve = MatSolve_SeqAIJ;
371   }
372   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
373   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
374   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
375   C->ops->matsolve          = MatMatSolve_SeqAIJ;
376   C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
377   C->assembled              = PETSC_TRUE;
378   C->preallocated           = PETSC_TRUE;
379 
380   PetscCall(PetscLogFlops(C->cmap->n));
381 
382   /* MatShiftView(A,info,&sctx) */
383   if (sctx.nshift) {
384     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
385       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
386     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
387       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
388     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
389       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
390     }
391   }
392   PetscFunctionReturn(PETSC_SUCCESS);
393 }
394 
395 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
396 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
397 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
398 
399 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
400 {
401   Mat              C = B;
402   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
403   IS               isrow = b->row, isicol = b->icol;
404   const PetscInt  *r, *ic, *ics;
405   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
406   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
407   const PetscInt  *ajtmp, *bjtmp, *ddiag, *pj;
408   MatScalar       *pv, *rtmp, *pc, multiplier, d;
409   const MatScalar *v, *aa;
410   MatScalar       *ba;
411   PetscReal        rs = 0.0;
412   FactorShiftCtx   sctx;
413   PetscBool        row_identity, col_identity;
414 
415   PetscFunctionBegin;
416   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
417 
418   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
419   PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
420   /* MatPivotSetUp(): initialize shift context sctx */
421   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
422 
423   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
424     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
425     sctx.shift_top = info->zeropivot;
426     for (i = 0; i < n; i++) {
427       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
428       d  = aa[ddiag[i]];
429       rs = -PetscAbsScalar(d) - PetscRealPart(d);
430       v  = aa + ai[i];
431       nz = ai[i + 1] - ai[i];
432       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
433       if (rs > sctx.shift_top) sctx.shift_top = rs;
434     }
435     sctx.shift_top *= 1.1;
436     sctx.nshift_max = 5;
437     sctx.shift_lo   = 0.;
438     sctx.shift_hi   = 1.;
439   }
440 
441   PetscCall(ISGetIndices(isrow, &r));
442   PetscCall(ISGetIndices(isicol, &ic));
443   PetscCall(PetscMalloc1(n + 1, &rtmp));
444   ics = ic;
445 
446   do {
447     sctx.newshift = PETSC_FALSE;
448     for (i = 0; i < n; i++) {
449       nz    = bi[i + 1] - bi[i];
450       bjtmp = bj + bi[i];
451       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
452 
453       /* load in initial (unfactored row) */
454       nz    = ai[r[i] + 1] - ai[r[i]];
455       ajtmp = aj + ai[r[i]];
456       v     = aa + ai[r[i]];
457       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
458       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
459 
460       row = *bjtmp++;
461       while (row < i) {
462         pc = rtmp + row;
463         if (*pc != 0.0) {
464           pv         = ba + ddiag[row];
465           pj         = b->j + ddiag[row] + 1;
466           multiplier = *pc / *pv++;
467           *pc        = multiplier;
468           nz         = bi[row + 1] - ddiag[row] - 1;
469           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
470           PetscCall(PetscLogFlops(1 + 2.0 * nz));
471         }
472         row = *bjtmp++;
473       }
474       /* finished row so stick it into b->a */
475       pv   = ba + bi[i];
476       pj   = b->j + bi[i];
477       nz   = bi[i + 1] - bi[i];
478       diag = ddiag[i] - bi[i];
479       rs   = 0.0;
480       for (j = 0; j < nz; j++) {
481         pv[j] = rtmp[pj[j]];
482         rs += PetscAbsScalar(pv[j]);
483       }
484       rs -= PetscAbsScalar(pv[diag]);
485 
486       sctx.rs = rs;
487       sctx.pv = pv[diag];
488       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
489       if (sctx.newshift) break;
490       pv[diag] = sctx.pv;
491     }
492 
493     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
494       /*
495        * if no shift in this attempt & shifting & started shifting & can refine,
496        * then try lower shift
497        */
498       sctx.shift_hi       = sctx.shift_fraction;
499       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
500       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
501       sctx.newshift       = PETSC_TRUE;
502       sctx.nshift++;
503     }
504   } while (sctx.newshift);
505 
506   /* invert diagonal entries for simpler triangular solves */
507   for (i = 0; i < n; i++) ba[ddiag[i]] = 1.0 / ba[ddiag[i]];
508   PetscCall(PetscFree(rtmp));
509   PetscCall(ISRestoreIndices(isicol, &ic));
510   PetscCall(ISRestoreIndices(isrow, &r));
511   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
512   PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
513 
514   PetscCall(ISIdentity(isrow, &row_identity));
515   PetscCall(ISIdentity(isicol, &col_identity));
516   if (row_identity && col_identity) {
517     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
518   } else {
519     C->ops->solve = MatSolve_SeqAIJ_inplace;
520   }
521   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
522   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
523   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
524   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
525   C->ops->matsolvetranspose = NULL;
526 
527   C->assembled    = PETSC_TRUE;
528   C->preallocated = PETSC_TRUE;
529 
530   PetscCall(PetscLogFlops(C->cmap->n));
531   if (sctx.nshift) {
532     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
533       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
534     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
535       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
536     }
537   }
538   C->ops->solve          = MatSolve_SeqAIJ_inplace;
539   C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
540 
541   PetscCall(MatSeqAIJCheckInode(C));
542   PetscFunctionReturn(PETSC_SUCCESS);
543 }
544 
545 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
546 
547 /*
548    This routine implements inplace ILU(0) with row or/and column permutations.
549    Input:
550      A - original matrix
551    Output;
552      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
553          a->j (col index) is permuted by the inverse of colperm, then sorted
554          a->a reordered accordingly with a->j
555          a->diag (ptr to diagonal elements) is updated.
556 */
557 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
558 {
559   Mat_SeqAIJ     *a     = (Mat_SeqAIJ *)A->data;
560   IS              isrow = a->row, isicol = a->icol;
561   const PetscInt *r, *ic, *ics;
562   PetscInt        i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
563   PetscInt       *ajtmp, nz, row;
564   PetscInt        nbdiag, *pj;
565   PetscScalar    *rtmp, *pc, multiplier, d;
566   MatScalar      *pv, *v;
567   PetscReal       rs;
568   FactorShiftCtx  sctx;
569   MatScalar      *aa, *vtmp;
570   PetscInt       *diag;
571 
572   PetscFunctionBegin;
573   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
574 
575   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, (const PetscInt **)&diag, NULL));
576   PetscCall(MatSeqAIJGetArray(A, &aa));
577   /* MatPivotSetUp(): initialize shift context sctx */
578   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
579 
580   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
581     const PetscInt *ddiag;
582 
583     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
584     sctx.shift_top = info->zeropivot;
585     for (i = 0; i < n; i++) {
586       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
587       d    = aa[ddiag[i]];
588       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
589       vtmp = aa + ai[i];
590       nz   = ai[i + 1] - ai[i];
591       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
592       if (rs > sctx.shift_top) sctx.shift_top = rs;
593     }
594     sctx.shift_top *= 1.1;
595     sctx.nshift_max = 5;
596     sctx.shift_lo   = 0.;
597     sctx.shift_hi   = 1.;
598   }
599 
600   PetscCall(ISGetIndices(isrow, &r));
601   PetscCall(ISGetIndices(isicol, &ic));
602   PetscCall(PetscMalloc1(n + 1, &rtmp));
603   PetscCall(PetscArrayzero(rtmp, n + 1));
604   ics = ic;
605 
606 #if defined(MV)
607   sctx.shift_top      = 0.;
608   sctx.nshift_max     = 0;
609   sctx.shift_lo       = 0.;
610   sctx.shift_hi       = 0.;
611   sctx.shift_fraction = 0.;
612 
613   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
614     sctx.shift_top = 0.;
615     for (i = 0; i < n; i++) {
616       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
617       d  = aa[diag[i]];
618       rs = -PetscAbsScalar(d) - PetscRealPart(d);
619       v  = aa + ai[i];
620       nz = ai[i + 1] - ai[i];
621       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
622       if (rs > sctx.shift_top) sctx.shift_top = rs;
623     }
624     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
625     sctx.shift_top *= 1.1;
626     sctx.nshift_max = 5;
627     sctx.shift_lo   = 0.;
628     sctx.shift_hi   = 1.;
629   }
630 
631   sctx.shift_amount = 0.;
632   sctx.nshift       = 0;
633 #endif
634 
635   do {
636     sctx.newshift = PETSC_FALSE;
637     for (i = 0; i < n; i++) {
638       /* load in initial unfactored row */
639       nz    = ai[r[i] + 1] - ai[r[i]];
640       ajtmp = aj + ai[r[i]];
641       v     = aa + ai[r[i]];
642       /* sort permuted ajtmp and values v accordingly */
643       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
644       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
645 
646       diag[r[i]] = ai[r[i]];
647       for (j = 0; j < nz; j++) {
648         rtmp[ajtmp[j]] = v[j];
649         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
650       }
651       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
652 
653       row = *ajtmp++;
654       while (row < i) {
655         pc = rtmp + row;
656         if (*pc != 0.0) {
657           pv = aa + diag[r[row]];
658           pj = aj + diag[r[row]] + 1;
659 
660           multiplier = *pc / *pv++;
661           *pc        = multiplier;
662           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
663           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
664           PetscCall(PetscLogFlops(1 + 2.0 * nz));
665         }
666         row = *ajtmp++;
667       }
668       /* finished row so overwrite it onto aa */
669       pv     = aa + ai[r[i]];
670       pj     = aj + ai[r[i]];
671       nz     = ai[r[i] + 1] - ai[r[i]];
672       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
673 
674       rs = 0.0;
675       for (j = 0; j < nz; j++) {
676         pv[j] = rtmp[pj[j]];
677         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
678       }
679 
680       sctx.rs = rs;
681       sctx.pv = pv[nbdiag];
682       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
683       if (sctx.newshift) break;
684       pv[nbdiag] = sctx.pv;
685     }
686 
687     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
688       /*
689        * if no shift in this attempt & shifting & started shifting & can refine,
690        * then try lower shift
691        */
692       sctx.shift_hi       = sctx.shift_fraction;
693       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
694       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
695       sctx.newshift       = PETSC_TRUE;
696       sctx.nshift++;
697     }
698   } while (sctx.newshift);
699 
700   /* invert diagonal entries for simpler triangular solves */
701   for (i = 0; i < n; i++) aa[diag[r[i]]] = 1.0 / aa[diag[r[i]]];
702 
703   PetscCall(MatSeqAIJRestoreArray(A, &aa));
704   PetscCall(PetscFree(rtmp));
705   PetscCall(ISRestoreIndices(isicol, &ic));
706   PetscCall(ISRestoreIndices(isrow, &r));
707 
708   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
709   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
710   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
711   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
712 
713   A->assembled    = PETSC_TRUE;
714   A->preallocated = PETSC_TRUE;
715 
716   PetscCall(PetscLogFlops(A->cmap->n));
717   if (sctx.nshift) {
718     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
719       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
720     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
721       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
722     }
723   }
724   PetscFunctionReturn(PETSC_SUCCESS);
725 }
726 
727 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
728 {
729   Mat C;
730 
731   PetscFunctionBegin;
732   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
733   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
734   PetscCall(MatLUFactorNumeric(C, A, info));
735 
736   A->ops->solve          = C->ops->solve;
737   A->ops->solvetranspose = C->ops->solvetranspose;
738 
739   PetscCall(MatHeaderMerge(A, &C));
740   PetscFunctionReturn(PETSC_SUCCESS);
741 }
742 
743 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
744 {
745   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
746   IS                 iscol = a->col, isrow = a->row;
747   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
748   PetscInt           nz;
749   const PetscInt    *rout, *cout, *r, *c;
750   PetscScalar       *x, *tmp, *tmps, sum;
751   const PetscScalar *b;
752   const MatScalar   *aa, *v;
753   const PetscInt    *adiag;
754 
755   PetscFunctionBegin;
756   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
757 
758   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
759   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
760   PetscCall(VecGetArrayRead(bb, &b));
761   PetscCall(VecGetArrayWrite(xx, &x));
762   tmp = a->solve_work;
763 
764   PetscCall(ISGetIndices(isrow, &rout));
765   r = rout;
766   PetscCall(ISGetIndices(iscol, &cout));
767   c = cout + (n - 1);
768 
769   /* forward solve the lower triangular */
770   tmp[0] = b[*r++];
771   tmps   = tmp;
772   for (i = 1; i < n; i++) {
773     v   = aa + ai[i];
774     vi  = aj + ai[i];
775     nz  = adiag[i] - ai[i];
776     sum = b[*r++];
777     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
778     tmp[i] = sum;
779   }
780 
781   /* backward solve the upper triangular */
782   for (i = n - 1; i >= 0; i--) {
783     v   = aa + adiag[i] + 1;
784     vi  = aj + adiag[i] + 1;
785     nz  = ai[i + 1] - adiag[i] - 1;
786     sum = tmp[i];
787     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
788     x[*c--] = tmp[i] = sum * aa[adiag[i]];
789   }
790   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
791   PetscCall(ISRestoreIndices(isrow, &rout));
792   PetscCall(ISRestoreIndices(iscol, &cout));
793   PetscCall(VecRestoreArrayRead(bb, &b));
794   PetscCall(VecRestoreArrayWrite(xx, &x));
795   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
796   PetscFunctionReturn(PETSC_SUCCESS);
797 }
798 
799 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
800 {
801   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
802   IS                 iscol = a->col, isrow = a->row;
803   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
804   PetscInt           nz, neq, ldb, ldx;
805   const PetscInt    *rout, *cout, *r, *c;
806   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
807   const PetscScalar *b, *aa, *v;
808   PetscBool          isdense;
809   const PetscInt    *adiag;
810 
811   PetscFunctionBegin;
812   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
813   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
814   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
815   if (X != B) {
816     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
817     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
818   }
819   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
820   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
821   PetscCall(MatDenseGetArrayRead(B, &b));
822   PetscCall(MatDenseGetLDA(B, &ldb));
823   PetscCall(MatDenseGetArray(X, &x));
824   PetscCall(MatDenseGetLDA(X, &ldx));
825   PetscCall(ISGetIndices(isrow, &rout));
826   r = rout;
827   PetscCall(ISGetIndices(iscol, &cout));
828   c = cout;
829   for (neq = 0; neq < B->cmap->n; neq++) {
830     /* forward solve the lower triangular */
831     tmp[0] = b[r[0]];
832     tmps   = tmp;
833     for (i = 1; i < n; i++) {
834       v   = aa + ai[i];
835       vi  = aj + ai[i];
836       nz  = adiag[i] - ai[i];
837       sum = b[r[i]];
838       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
839       tmp[i] = sum;
840     }
841     /* backward solve the upper triangular */
842     for (i = n - 1; i >= 0; i--) {
843       v   = aa + adiag[i] + 1;
844       vi  = aj + adiag[i] + 1;
845       nz  = ai[i + 1] - adiag[i] - 1;
846       sum = tmp[i];
847       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
848       x[c[i]] = tmp[i] = sum * aa[adiag[i]];
849     }
850     b += ldb;
851     x += ldx;
852   }
853   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
854   PetscCall(ISRestoreIndices(isrow, &rout));
855   PetscCall(ISRestoreIndices(iscol, &cout));
856   PetscCall(MatDenseRestoreArrayRead(B, &b));
857   PetscCall(MatDenseRestoreArray(X, &x));
858   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
859   PetscFunctionReturn(PETSC_SUCCESS);
860 }
861 
862 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
863 {
864   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
865   IS                 iscol = a->col, isrow = a->row;
866   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
867   const PetscInt    *adiag;
868   PetscInt           nz, neq, ldb, ldx;
869   const PetscInt    *rout, *cout, *r, *c;
870   PetscScalar       *x, *tmp = a->solve_work, sum;
871   const PetscScalar *b, *aa, *v;
872   PetscBool          isdense;
873 
874   PetscFunctionBegin;
875   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
876   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
877   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
878   if (X != B) {
879     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
880     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
881   }
882   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
883   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
884   PetscCall(MatDenseGetArrayRead(B, &b));
885   PetscCall(MatDenseGetLDA(B, &ldb));
886   PetscCall(MatDenseGetArray(X, &x));
887   PetscCall(MatDenseGetLDA(X, &ldx));
888   PetscCall(ISGetIndices(isrow, &rout));
889   r = rout;
890   PetscCall(ISGetIndices(iscol, &cout));
891   c = cout;
892   for (neq = 0; neq < B->cmap->n; neq++) {
893     /* forward solve the lower triangular */
894     tmp[0] = b[r[0]];
895     v      = aa;
896     vi     = aj;
897     for (i = 1; i < n; i++) {
898       nz  = ai[i + 1] - ai[i];
899       sum = b[r[i]];
900       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
901       tmp[i] = sum;
902       v += nz;
903       vi += nz;
904     }
905     /* backward solve the upper triangular */
906     for (i = n - 1; i >= 0; i--) {
907       v   = aa + adiag[i + 1] + 1;
908       vi  = aj + adiag[i + 1] + 1;
909       nz  = adiag[i] - adiag[i + 1] - 1;
910       sum = tmp[i];
911       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
912       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
913     }
914     b += ldb;
915     x += ldx;
916   }
917   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
918   PetscCall(ISRestoreIndices(isrow, &rout));
919   PetscCall(ISRestoreIndices(iscol, &cout));
920   PetscCall(MatDenseRestoreArrayRead(B, &b));
921   PetscCall(MatDenseRestoreArray(X, &x));
922   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
923   PetscFunctionReturn(PETSC_SUCCESS);
924 }
925 
926 PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
927 {
928   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
929   IS                 iscol = a->col, isrow = a->row;
930   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, j;
931   const PetscInt    *adiag = a->diag;
932   PetscInt           nz, neq, ldb, ldx;
933   const PetscInt    *rout, *cout, *r, *c;
934   PetscScalar       *x, *tmp = a->solve_work, s1;
935   const PetscScalar *b, *aa, *v;
936   PetscBool          isdense;
937 
938   PetscFunctionBegin;
939   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
940   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
941   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
942   if (X != B) {
943     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
944     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
945   }
946   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
947   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
948   PetscCall(MatDenseGetArrayRead(B, &b));
949   PetscCall(MatDenseGetLDA(B, &ldb));
950   PetscCall(MatDenseGetArray(X, &x));
951   PetscCall(MatDenseGetLDA(X, &ldx));
952   PetscCall(ISGetIndices(isrow, &rout));
953   r = rout;
954   PetscCall(ISGetIndices(iscol, &cout));
955   c = cout;
956   for (neq = 0; neq < B->cmap->n; neq++) {
957     /* copy the b into temp work space according to permutation */
958     for (i = 0; i < n; i++) tmp[i] = b[c[i]];
959 
960     /* forward solve the U^T */
961     for (i = 0; i < n; i++) {
962       v  = aa + adiag[i + 1] + 1;
963       vi = aj + adiag[i + 1] + 1;
964       nz = adiag[i] - adiag[i + 1] - 1;
965       s1 = tmp[i];
966       s1 *= v[nz]; /* multiply by inverse of diagonal entry */
967       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
968       tmp[i] = s1;
969     }
970 
971     /* backward solve the L^T */
972     for (i = n - 1; i >= 0; i--) {
973       v  = aa + ai[i];
974       vi = aj + ai[i];
975       nz = ai[i + 1] - ai[i];
976       s1 = tmp[i];
977       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
978     }
979 
980     /* copy tmp into x according to permutation */
981     for (i = 0; i < n; i++) x[r[i]] = tmp[i];
982     b += ldb;
983     x += ldx;
984   }
985   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
986   PetscCall(ISRestoreIndices(isrow, &rout));
987   PetscCall(ISRestoreIndices(iscol, &cout));
988   PetscCall(MatDenseRestoreArrayRead(B, &b));
989   PetscCall(MatDenseRestoreArray(X, &x));
990   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
991   PetscFunctionReturn(PETSC_SUCCESS);
992 }
993 
994 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
995 {
996   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
997   IS                 iscol = a->col, isrow = a->row;
998   const PetscInt    *r, *c, *rout, *cout, *adiag;
999   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1000   PetscInt           nz, row;
1001   PetscScalar       *x, *tmp, *tmps, sum;
1002   const PetscScalar *b;
1003   const MatScalar   *aa, *v;
1004 
1005   PetscFunctionBegin;
1006   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1007 
1008   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1009   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1010   PetscCall(VecGetArrayRead(bb, &b));
1011   PetscCall(VecGetArrayWrite(xx, &x));
1012   tmp = a->solve_work;
1013 
1014   PetscCall(ISGetIndices(isrow, &rout));
1015   r = rout;
1016   PetscCall(ISGetIndices(iscol, &cout));
1017   c = cout + (n - 1);
1018 
1019   /* forward solve the lower triangular */
1020   tmp[0] = b[*r++];
1021   tmps   = tmp;
1022   for (row = 1; row < n; row++) {
1023     i   = rout[row]; /* permuted row */
1024     v   = aa + ai[i];
1025     vi  = aj + ai[i];
1026     nz  = adiag[i] - ai[i];
1027     sum = b[*r++];
1028     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1029     tmp[row] = sum;
1030   }
1031 
1032   /* backward solve the upper triangular */
1033   for (row = n - 1; row >= 0; row--) {
1034     i   = rout[row]; /* permuted row */
1035     v   = aa + adiag[i] + 1;
1036     vi  = aj + adiag[i] + 1;
1037     nz  = ai[i + 1] - adiag[i] - 1;
1038     sum = tmp[row];
1039     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1040     x[*c--] = tmp[row] = sum * aa[adiag[i]];
1041   }
1042   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1043   PetscCall(ISRestoreIndices(isrow, &rout));
1044   PetscCall(ISRestoreIndices(iscol, &cout));
1045   PetscCall(VecRestoreArrayRead(bb, &b));
1046   PetscCall(VecRestoreArrayWrite(xx, &x));
1047   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1048   PetscFunctionReturn(PETSC_SUCCESS);
1049 }
1050 
1051 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1052 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1053 {
1054   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1055   PetscInt           n  = A->rmap->n;
1056   const PetscInt    *ai = a->i, *aj = a->j, *adiag;
1057   PetscScalar       *x;
1058   const PetscScalar *b;
1059   const MatScalar   *aa;
1060 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1061   PetscInt         adiag_i, i, nz, ai_i;
1062   const PetscInt  *vi;
1063   const MatScalar *v;
1064   PetscScalar      sum;
1065 #endif
1066 
1067   PetscFunctionBegin;
1068   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1069   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1070   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1071   PetscCall(VecGetArrayRead(bb, &b));
1072   PetscCall(VecGetArrayWrite(xx, &x));
1073 
1074 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1075   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1076 #else
1077   /* forward solve the lower triangular */
1078   x[0] = b[0];
1079   for (i = 1; i < n; i++) {
1080     ai_i = ai[i];
1081     v    = aa + ai_i;
1082     vi   = aj + ai_i;
1083     nz   = adiag[i] - ai_i;
1084     sum  = b[i];
1085     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1086     x[i] = sum;
1087   }
1088 
1089   /* backward solve the upper triangular */
1090   for (i = n - 1; i >= 0; i--) {
1091     adiag_i = adiag[i];
1092     v       = aa + adiag_i + 1;
1093     vi      = aj + adiag_i + 1;
1094     nz      = ai[i + 1] - adiag_i - 1;
1095     sum     = x[i];
1096     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1097     x[i] = sum * aa[adiag_i];
1098   }
1099 #endif
1100   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1101   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1102   PetscCall(VecRestoreArrayRead(bb, &b));
1103   PetscCall(VecRestoreArrayWrite(xx, &x));
1104   PetscFunctionReturn(PETSC_SUCCESS);
1105 }
1106 
1107 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1108 {
1109   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1110   IS                 iscol = a->col, isrow = a->row;
1111   PetscInt           i, n                  = A->rmap->n, j;
1112   PetscInt           nz;
1113   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag;
1114   PetscScalar       *x, *tmp, sum;
1115   const PetscScalar *b;
1116   const MatScalar   *aa, *v;
1117 
1118   PetscFunctionBegin;
1119   if (yy != xx) PetscCall(VecCopy(yy, xx));
1120 
1121   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1122   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1123   PetscCall(VecGetArrayRead(bb, &b));
1124   PetscCall(VecGetArray(xx, &x));
1125   tmp = a->solve_work;
1126 
1127   PetscCall(ISGetIndices(isrow, &rout));
1128   r = rout;
1129   PetscCall(ISGetIndices(iscol, &cout));
1130   c = cout + (n - 1);
1131 
1132   /* forward solve the lower triangular */
1133   tmp[0] = b[*r++];
1134   for (i = 1; i < n; i++) {
1135     v   = aa + ai[i];
1136     vi  = aj + ai[i];
1137     nz  = adiag[i] - ai[i];
1138     sum = b[*r++];
1139     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1140     tmp[i] = sum;
1141   }
1142 
1143   /* backward solve the upper triangular */
1144   for (i = n - 1; i >= 0; i--) {
1145     v   = aa + adiag[i] + 1;
1146     vi  = aj + adiag[i] + 1;
1147     nz  = ai[i + 1] - adiag[i] - 1;
1148     sum = tmp[i];
1149     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1150     tmp[i] = sum * aa[adiag[i]];
1151     x[*c--] += tmp[i];
1152   }
1153 
1154   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1155   PetscCall(ISRestoreIndices(isrow, &rout));
1156   PetscCall(ISRestoreIndices(iscol, &cout));
1157   PetscCall(VecRestoreArrayRead(bb, &b));
1158   PetscCall(VecRestoreArray(xx, &x));
1159   PetscCall(PetscLogFlops(2.0 * a->nz));
1160   PetscFunctionReturn(PETSC_SUCCESS);
1161 }
1162 
1163 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1164 {
1165   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1166   IS                 iscol = a->col, isrow = a->row;
1167   PetscInt           i, n                  = A->rmap->n, j;
1168   PetscInt           nz;
1169   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag;
1170   PetscScalar       *x, *tmp, sum;
1171   const PetscScalar *b;
1172   const MatScalar   *aa, *v;
1173 
1174   PetscFunctionBegin;
1175   if (yy != xx) PetscCall(VecCopy(yy, xx));
1176 
1177   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1178   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1179   PetscCall(VecGetArrayRead(bb, &b));
1180   PetscCall(VecGetArray(xx, &x));
1181   tmp = a->solve_work;
1182 
1183   PetscCall(ISGetIndices(isrow, &rout));
1184   r = rout;
1185   PetscCall(ISGetIndices(iscol, &cout));
1186   c = cout;
1187 
1188   /* forward solve the lower triangular */
1189   tmp[0] = b[r[0]];
1190   v      = aa;
1191   vi     = aj;
1192   for (i = 1; i < n; i++) {
1193     nz  = ai[i + 1] - ai[i];
1194     sum = b[r[i]];
1195     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1196     tmp[i] = sum;
1197     v += nz;
1198     vi += nz;
1199   }
1200 
1201   /* backward solve the upper triangular */
1202   v  = aa + adiag[n - 1];
1203   vi = aj + adiag[n - 1];
1204   for (i = n - 1; i >= 0; i--) {
1205     nz  = adiag[i] - adiag[i + 1] - 1;
1206     sum = tmp[i];
1207     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1208     tmp[i] = sum * v[nz];
1209     x[c[i]] += tmp[i];
1210     v += nz + 1;
1211     vi += nz + 1;
1212   }
1213 
1214   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1215   PetscCall(ISRestoreIndices(isrow, &rout));
1216   PetscCall(ISRestoreIndices(iscol, &cout));
1217   PetscCall(VecRestoreArrayRead(bb, &b));
1218   PetscCall(VecRestoreArray(xx, &x));
1219   PetscCall(PetscLogFlops(2.0 * a->nz));
1220   PetscFunctionReturn(PETSC_SUCCESS);
1221 }
1222 
1223 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1224 {
1225   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1226   IS                 iscol = a->col, isrow = a->row;
1227   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1228   PetscInt           i, n = A->rmap->n, j;
1229   PetscInt           nz;
1230   PetscScalar       *x, *tmp, s1;
1231   const MatScalar   *aa, *v;
1232   const PetscScalar *b;
1233 
1234   PetscFunctionBegin;
1235   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1236   PetscCall(VecGetArrayRead(bb, &b));
1237   PetscCall(VecGetArrayWrite(xx, &x));
1238   tmp = a->solve_work;
1239 
1240   PetscCall(ISGetIndices(isrow, &rout));
1241   r = rout;
1242   PetscCall(ISGetIndices(iscol, &cout));
1243   c = cout;
1244 
1245   /* copy the b into temp work space according to permutation */
1246   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1247 
1248   /* forward solve the U^T */
1249   for (i = 0; i < n; i++) {
1250     v  = aa + diag[i];
1251     vi = aj + diag[i] + 1;
1252     nz = ai[i + 1] - diag[i] - 1;
1253     s1 = tmp[i];
1254     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1255     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1256     tmp[i] = s1;
1257   }
1258 
1259   /* backward solve the L^T */
1260   for (i = n - 1; i >= 0; i--) {
1261     v  = aa + diag[i] - 1;
1262     vi = aj + diag[i] - 1;
1263     nz = diag[i] - ai[i];
1264     s1 = tmp[i];
1265     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1266   }
1267 
1268   /* copy tmp into x according to permutation */
1269   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1270 
1271   PetscCall(ISRestoreIndices(isrow, &rout));
1272   PetscCall(ISRestoreIndices(iscol, &cout));
1273   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1274   PetscCall(VecRestoreArrayRead(bb, &b));
1275   PetscCall(VecRestoreArrayWrite(xx, &x));
1276 
1277   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1278   PetscFunctionReturn(PETSC_SUCCESS);
1279 }
1280 
1281 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1282 {
1283   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1284   IS                 iscol = a->col, isrow = a->row;
1285   const PetscInt    *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi;
1286   PetscInt           i, n = A->rmap->n, j;
1287   PetscInt           nz;
1288   PetscScalar       *x, *tmp, s1;
1289   const MatScalar   *aa, *v;
1290   const PetscScalar *b;
1291 
1292   PetscFunctionBegin;
1293   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1294   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1295   PetscCall(VecGetArrayRead(bb, &b));
1296   PetscCall(VecGetArrayWrite(xx, &x));
1297   tmp = a->solve_work;
1298 
1299   PetscCall(ISGetIndices(isrow, &rout));
1300   r = rout;
1301   PetscCall(ISGetIndices(iscol, &cout));
1302   c = cout;
1303 
1304   /* copy the b into temp work space according to permutation */
1305   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1306 
1307   /* forward solve the U^T */
1308   for (i = 0; i < n; i++) {
1309     v  = aa + adiag[i + 1] + 1;
1310     vi = aj + adiag[i + 1] + 1;
1311     nz = adiag[i] - adiag[i + 1] - 1;
1312     s1 = tmp[i];
1313     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1314     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1315     tmp[i] = s1;
1316   }
1317 
1318   /* backward solve the L^T */
1319   for (i = n - 1; i >= 0; i--) {
1320     v  = aa + ai[i];
1321     vi = aj + ai[i];
1322     nz = ai[i + 1] - ai[i];
1323     s1 = tmp[i];
1324     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1325   }
1326 
1327   /* copy tmp into x according to permutation */
1328   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1329 
1330   PetscCall(ISRestoreIndices(isrow, &rout));
1331   PetscCall(ISRestoreIndices(iscol, &cout));
1332   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1333   PetscCall(VecRestoreArrayRead(bb, &b));
1334   PetscCall(VecRestoreArrayWrite(xx, &x));
1335 
1336   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1337   PetscFunctionReturn(PETSC_SUCCESS);
1338 }
1339 
1340 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1341 {
1342   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1343   IS                 iscol = a->col, isrow = a->row;
1344   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1345   PetscInt           i, n = A->rmap->n, j;
1346   PetscInt           nz;
1347   PetscScalar       *x, *tmp, s1;
1348   const MatScalar   *aa, *v;
1349   const PetscScalar *b;
1350 
1351   PetscFunctionBegin;
1352   if (zz != xx) PetscCall(VecCopy(zz, xx));
1353   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1354   PetscCall(VecGetArrayRead(bb, &b));
1355   PetscCall(VecGetArray(xx, &x));
1356   tmp = a->solve_work;
1357 
1358   PetscCall(ISGetIndices(isrow, &rout));
1359   r = rout;
1360   PetscCall(ISGetIndices(iscol, &cout));
1361   c = cout;
1362 
1363   /* copy the b into temp work space according to permutation */
1364   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1365 
1366   /* forward solve the U^T */
1367   for (i = 0; i < n; i++) {
1368     v  = aa + diag[i];
1369     vi = aj + diag[i] + 1;
1370     nz = ai[i + 1] - diag[i] - 1;
1371     s1 = tmp[i];
1372     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1373     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1374     tmp[i] = s1;
1375   }
1376 
1377   /* backward solve the L^T */
1378   for (i = n - 1; i >= 0; i--) {
1379     v  = aa + diag[i] - 1;
1380     vi = aj + diag[i] - 1;
1381     nz = diag[i] - ai[i];
1382     s1 = tmp[i];
1383     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1384   }
1385 
1386   /* copy tmp into x according to permutation */
1387   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1388 
1389   PetscCall(ISRestoreIndices(isrow, &rout));
1390   PetscCall(ISRestoreIndices(iscol, &cout));
1391   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1392   PetscCall(VecRestoreArrayRead(bb, &b));
1393   PetscCall(VecRestoreArray(xx, &x));
1394 
1395   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1396   PetscFunctionReturn(PETSC_SUCCESS);
1397 }
1398 
1399 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1400 {
1401   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1402   IS                 iscol = a->col, isrow = a->row;
1403   const PetscInt    *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi;
1404   PetscInt           i, n = A->rmap->n, j;
1405   PetscInt           nz;
1406   PetscScalar       *x, *tmp, s1;
1407   const MatScalar   *aa, *v;
1408   const PetscScalar *b;
1409 
1410   PetscFunctionBegin;
1411   if (zz != xx) PetscCall(VecCopy(zz, xx));
1412   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1413   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1414   PetscCall(VecGetArrayRead(bb, &b));
1415   PetscCall(VecGetArray(xx, &x));
1416   tmp = a->solve_work;
1417 
1418   PetscCall(ISGetIndices(isrow, &rout));
1419   r = rout;
1420   PetscCall(ISGetIndices(iscol, &cout));
1421   c = cout;
1422 
1423   /* copy the b into temp work space according to permutation */
1424   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1425 
1426   /* forward solve the U^T */
1427   for (i = 0; i < n; i++) {
1428     v  = aa + adiag[i + 1] + 1;
1429     vi = aj + adiag[i + 1] + 1;
1430     nz = adiag[i] - adiag[i + 1] - 1;
1431     s1 = tmp[i];
1432     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1433     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1434     tmp[i] = s1;
1435   }
1436 
1437   /* backward solve the L^T */
1438   for (i = n - 1; i >= 0; i--) {
1439     v  = aa + ai[i];
1440     vi = aj + ai[i];
1441     nz = ai[i + 1] - ai[i];
1442     s1 = tmp[i];
1443     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1444   }
1445 
1446   /* copy tmp into x according to permutation */
1447   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1448 
1449   PetscCall(ISRestoreIndices(isrow, &rout));
1450   PetscCall(ISRestoreIndices(iscol, &cout));
1451   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1452   PetscCall(VecRestoreArrayRead(bb, &b));
1453   PetscCall(VecRestoreArray(xx, &x));
1454 
1455   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1456   PetscFunctionReturn(PETSC_SUCCESS);
1457 }
1458 
1459 /*
1460    ilu() under revised new data structure.
1461    Factored arrays bj and ba are stored as
1462      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1463 
1464    bi=fact->i is an array of size n+1, in which
1465      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1466      bi[n]:  points to L(n-1,n-1)+1
1467 
1468   bdiag=fact->diag is an array of size n+1,in which
1469      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1470      bdiag[n]: points to entry of U(n-1,0)-1
1471 
1472    U(i,:) contains bdiag[i] as its last entry, i.e.,
1473     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1474 */
1475 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1476 {
1477   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1478   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag;
1479   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1480   IS             isicol;
1481 
1482   PetscFunctionBegin;
1483   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1484   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1485   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1486   b = (Mat_SeqAIJ *)fact->data;
1487 
1488   /* allocate matrix arrays for new data structure */
1489   PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscScalar), (void **)&b->a));
1490   PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscInt), (void **)&b->j));
1491   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i));
1492   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1493   b->free_a  = PETSC_TRUE;
1494   b->free_ij = PETSC_TRUE;
1495 
1496   if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1497   bdiag = b->diag;
1498 
1499   /* set bi and bj with new data structure */
1500   bi = b->i;
1501   bj = b->j;
1502 
1503   /* L part */
1504   bi[0] = 0;
1505   for (i = 0; i < n; i++) {
1506     nz        = adiag[i] - ai[i];
1507     bi[i + 1] = bi[i] + nz;
1508     aj        = a->j + ai[i];
1509     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1510   }
1511 
1512   /* U part */
1513   bdiag[n] = bi[n] - 1;
1514   for (i = n - 1; i >= 0; i--) {
1515     nz = ai[i + 1] - adiag[i] - 1;
1516     aj = a->j + adiag[i] + 1;
1517     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1518     /* diag[i] */
1519     bj[k++]  = i;
1520     bdiag[i] = bdiag[i + 1] + nz + 1;
1521   }
1522 
1523   fact->factortype             = MAT_FACTOR_ILU;
1524   fact->info.factor_mallocs    = 0;
1525   fact->info.fill_ratio_given  = info->fill;
1526   fact->info.fill_ratio_needed = 1.0;
1527   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1528   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1529 
1530   b       = (Mat_SeqAIJ *)fact->data;
1531   b->row  = isrow;
1532   b->col  = iscol;
1533   b->icol = isicol;
1534   PetscCall(PetscMalloc1(fact->rmap->n, &b->solve_work));
1535   PetscCall(PetscObjectReference((PetscObject)isrow));
1536   PetscCall(PetscObjectReference((PetscObject)iscol));
1537   PetscFunctionReturn(PETSC_SUCCESS);
1538 }
1539 
1540 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1541 {
1542   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1543   IS                 isicol;
1544   const PetscInt    *r, *ic;
1545   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1546   PetscInt          *bi, *cols, nnz, *cols_lvl;
1547   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1548   PetscInt           i, levels, diagonal_fill;
1549   PetscBool          col_identity, row_identity;
1550   PetscReal          f;
1551   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1552   PetscBT            lnkbt;
1553   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1554   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1555   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1556   PetscBool          diagDense;
1557 
1558   PetscFunctionBegin;
1559   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1560   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
1561   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
1562 
1563   levels = (PetscInt)info->levels;
1564   PetscCall(ISIdentity(isrow, &row_identity));
1565   PetscCall(ISIdentity(iscol, &col_identity));
1566   if (!levels && row_identity && col_identity) {
1567     /* special case: ilu(0) with natural ordering */
1568     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1569     if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1570     PetscFunctionReturn(PETSC_SUCCESS);
1571   }
1572 
1573   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1574   PetscCall(ISGetIndices(isrow, &r));
1575   PetscCall(ISGetIndices(isicol, &ic));
1576 
1577   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1578   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
1579   PetscCall(PetscMalloc1(n + 1, &bdiag));
1580   bi[0] = bdiag[0] = 0;
1581   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1582 
1583   /* create a linked list for storing column indices of the active row */
1584   nlnk = n + 1;
1585   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1586 
1587   /* initial FreeSpace size is f*(ai[n]+1) */
1588   f             = info->fill;
1589   diagonal_fill = (PetscInt)info->diagonal_fill;
1590   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1591   current_space = free_space;
1592   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1593   current_space_lvl = free_space_lvl;
1594   for (i = 0; i < n; i++) {
1595     nzi = 0;
1596     /* copy current row into linked list */
1597     nnz = ai[r[i] + 1] - ai[r[i]];
1598     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1599     cols   = aj + ai[r[i]];
1600     lnk[i] = -1; /* marker to indicate if diagonal exists */
1601     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1602     nzi += nlnk;
1603 
1604     /* make sure diagonal entry is included */
1605     if (diagonal_fill && lnk[i] == -1) {
1606       fm = n;
1607       while (lnk[fm] < i) fm = lnk[fm];
1608       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1609       lnk[fm]    = i;
1610       lnk_lvl[i] = 0;
1611       nzi++;
1612       dcount++;
1613     }
1614 
1615     /* add pivot rows into the active row */
1616     nzbd = 0;
1617     prow = lnk[n];
1618     while (prow < i) {
1619       nnz      = bdiag[prow];
1620       cols     = bj_ptr[prow] + nnz + 1;
1621       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1622       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1623       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1624       nzi += nlnk;
1625       prow = lnk[prow];
1626       nzbd++;
1627     }
1628     bdiag[i]  = nzbd;
1629     bi[i + 1] = bi[i] + nzi;
1630     /* if free space is not available, make more free space */
1631     if (current_space->local_remaining < nzi) {
1632       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1633       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1634       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1635       reallocs++;
1636     }
1637 
1638     /* copy data into free_space and free_space_lvl, then initialize lnk */
1639     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1640     bj_ptr[i]    = current_space->array;
1641     bjlvl_ptr[i] = current_space_lvl->array;
1642 
1643     /* make sure the active row i has diagonal entry */
1644     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1645 
1646     current_space->array += nzi;
1647     current_space->local_used += nzi;
1648     current_space->local_remaining -= nzi;
1649     current_space_lvl->array += nzi;
1650     current_space_lvl->local_used += nzi;
1651     current_space_lvl->local_remaining -= nzi;
1652   }
1653 
1654   PetscCall(ISRestoreIndices(isrow, &r));
1655   PetscCall(ISRestoreIndices(isicol, &ic));
1656   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1657   PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
1658   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1659 
1660   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1661   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1662   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1663 
1664 #if defined(PETSC_USE_INFO)
1665   {
1666     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1667     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1668     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1669     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1670     PetscCall(PetscInfo(A, "for best performance.\n"));
1671     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1672   }
1673 #endif
1674   /* put together the new matrix */
1675   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1676   b          = (Mat_SeqAIJ *)fact->data;
1677   b->free_ij = PETSC_TRUE;
1678   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
1679   b->free_a = PETSC_TRUE;
1680   b->j      = bj;
1681   b->i      = bi;
1682   b->diag   = bdiag;
1683   b->ilen   = NULL;
1684   b->imax   = NULL;
1685   b->row    = isrow;
1686   b->col    = iscol;
1687   PetscCall(PetscObjectReference((PetscObject)isrow));
1688   PetscCall(PetscObjectReference((PetscObject)iscol));
1689   b->icol = isicol;
1690 
1691   PetscCall(PetscMalloc1(n, &b->solve_work));
1692   /* In b structure:  Free imax, ilen, old a, old j.
1693      Allocate bdiag, solve_work, new a, new j */
1694   b->maxnz = b->nz = bdiag[0] + 1;
1695 
1696   fact->info.factor_mallocs    = reallocs;
1697   fact->info.fill_ratio_given  = f;
1698   fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1699   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1700   if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1701   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1702   PetscFunctionReturn(PETSC_SUCCESS);
1703 }
1704 
1705 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
1706 {
1707   Mat              C  = B;
1708   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
1709   Mat_SeqSBAIJ    *b  = (Mat_SeqSBAIJ *)C->data;
1710   IS               ip = b->row, iip = b->icol;
1711   const PetscInt  *rip, *riip;
1712   PetscInt         i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1713   PetscInt        *ai = a->i, *aj = a->j;
1714   PetscInt         k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1715   MatScalar       *rtmp, *ba = b->a, *bval, dk, uikdi;
1716   PetscBool        perm_identity;
1717   FactorShiftCtx   sctx;
1718   PetscReal        rs;
1719   const MatScalar *aa, *v;
1720   MatScalar        d;
1721   const PetscInt  *adiag;
1722 
1723   PetscFunctionBegin;
1724   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1725   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1726   /* MatPivotSetUp(): initialize shift context sctx */
1727   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1728 
1729   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1730     sctx.shift_top = info->zeropivot;
1731     for (i = 0; i < mbs; i++) {
1732       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1733       d  = aa[adiag[i]];
1734       rs = -PetscAbsScalar(d) - PetscRealPart(d);
1735       v  = aa + ai[i];
1736       nz = ai[i + 1] - ai[i];
1737       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
1738       if (rs > sctx.shift_top) sctx.shift_top = rs;
1739     }
1740     sctx.shift_top *= 1.1;
1741     sctx.nshift_max = 5;
1742     sctx.shift_lo   = 0.;
1743     sctx.shift_hi   = 1.;
1744   }
1745 
1746   PetscCall(ISGetIndices(ip, &rip));
1747   PetscCall(ISGetIndices(iip, &riip));
1748 
1749   /* allocate working arrays
1750      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1751      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1752   */
1753   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
1754 
1755   do {
1756     sctx.newshift = PETSC_FALSE;
1757 
1758     for (i = 0; i < mbs; i++) c2r[i] = mbs;
1759     if (mbs) il[0] = 0;
1760 
1761     for (k = 0; k < mbs; k++) {
1762       /* zero rtmp */
1763       nz    = bi[k + 1] - bi[k];
1764       bjtmp = bj + bi[k];
1765       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1766 
1767       /* load in initial unfactored row */
1768       bval = ba + bi[k];
1769       jmin = ai[rip[k]];
1770       jmax = ai[rip[k] + 1];
1771       for (j = jmin; j < jmax; j++) {
1772         col = riip[aj[j]];
1773         if (col >= k) { /* only take upper triangular entry */
1774           rtmp[col] = aa[j];
1775           *bval++   = 0.0; /* for in-place factorization */
1776         }
1777       }
1778       /* shift the diagonal of the matrix: ZeropivotApply() */
1779       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
1780 
1781       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1782       dk = rtmp[k];
1783       i  = c2r[k]; /* first row to be added to k_th row  */
1784 
1785       while (i < k) {
1786         nexti = c2r[i]; /* next row to be added to k_th row */
1787 
1788         /* compute multiplier, update diag(k) and U(i,k) */
1789         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
1790         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1791         dk += uikdi * ba[ili];           /* update diag[k] */
1792         ba[ili] = uikdi;                 /* -U(i,k) */
1793 
1794         /* add multiple of row i to k-th row */
1795         jmin = ili + 1;
1796         jmax = bi[i + 1];
1797         if (jmin < jmax) {
1798           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1799           /* update il and c2r for row i */
1800           il[i]  = jmin;
1801           j      = bj[jmin];
1802           c2r[i] = c2r[j];
1803           c2r[j] = i;
1804         }
1805         i = nexti;
1806       }
1807 
1808       /* copy data into U(k,:) */
1809       rs   = 0.0;
1810       jmin = bi[k];
1811       jmax = bi[k + 1] - 1;
1812       if (jmin < jmax) {
1813         for (j = jmin; j < jmax; j++) {
1814           col   = bj[j];
1815           ba[j] = rtmp[col];
1816           rs += PetscAbsScalar(ba[j]);
1817         }
1818         /* add the k-th row into il and c2r */
1819         il[k]  = jmin;
1820         i      = bj[jmin];
1821         c2r[k] = c2r[i];
1822         c2r[i] = k;
1823       }
1824 
1825       /* MatPivotCheck() */
1826       sctx.rs = rs;
1827       sctx.pv = dk;
1828       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
1829       if (sctx.newshift) break;
1830       dk = sctx.pv;
1831 
1832       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1833     }
1834   } while (sctx.newshift);
1835 
1836   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1837   PetscCall(PetscFree3(rtmp, il, c2r));
1838   PetscCall(ISRestoreIndices(ip, &rip));
1839   PetscCall(ISRestoreIndices(iip, &riip));
1840 
1841   PetscCall(ISIdentity(ip, &perm_identity));
1842   if (perm_identity) {
1843     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1844     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1845     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1846     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1847   } else {
1848     B->ops->solve          = MatSolve_SeqSBAIJ_1;
1849     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
1850     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
1851     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
1852   }
1853 
1854   C->assembled    = PETSC_TRUE;
1855   C->preallocated = PETSC_TRUE;
1856 
1857   PetscCall(PetscLogFlops(C->rmap->n));
1858 
1859   /* MatPivotView() */
1860   if (sctx.nshift) {
1861     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1862       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1863     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1864       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1865     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1866       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1867     }
1868   }
1869   PetscFunctionReturn(PETSC_SUCCESS);
1870 }
1871 
1872 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
1873 {
1874   Mat              C  = B;
1875   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
1876   Mat_SeqSBAIJ    *b  = (Mat_SeqSBAIJ *)C->data;
1877   IS               ip = b->row, iip = b->icol;
1878   const PetscInt  *rip, *riip;
1879   PetscInt         i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
1880   PetscInt        *ai = a->i, *aj = a->j;
1881   PetscInt         k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1882   MatScalar       *rtmp, *ba = b->a, *bval, dk, uikdi;
1883   const MatScalar *aa, *v;
1884   PetscBool        perm_identity;
1885   FactorShiftCtx   sctx;
1886   PetscReal        rs;
1887   MatScalar        d;
1888   const PetscInt  *adiag;
1889 
1890   PetscFunctionBegin;
1891   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1892   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1893   /* MatPivotSetUp(): initialize shift context sctx */
1894   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1895 
1896   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1897     sctx.shift_top = info->zeropivot;
1898     for (i = 0; i < mbs; i++) {
1899       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1900       d  = aa[adiag[i]];
1901       rs = -PetscAbsScalar(d) - PetscRealPart(d);
1902       v  = aa + ai[i];
1903       nz = ai[i + 1] - ai[i];
1904       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
1905       if (rs > sctx.shift_top) sctx.shift_top = rs;
1906     }
1907     sctx.shift_top *= 1.1;
1908     sctx.nshift_max = 5;
1909     sctx.shift_lo   = 0.;
1910     sctx.shift_hi   = 1.;
1911   }
1912 
1913   PetscCall(ISGetIndices(ip, &rip));
1914   PetscCall(ISGetIndices(iip, &riip));
1915 
1916   /* initialization */
1917   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
1918 
1919   do {
1920     sctx.newshift = PETSC_FALSE;
1921 
1922     for (i = 0; i < mbs; i++) jl[i] = mbs;
1923     il[0] = 0;
1924 
1925     for (k = 0; k < mbs; k++) {
1926       /* zero rtmp */
1927       nz    = bi[k + 1] - bi[k];
1928       bjtmp = bj + bi[k];
1929       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1930 
1931       bval = ba + bi[k];
1932       /* initialize k-th row by the perm[k]-th row of A */
1933       jmin = ai[rip[k]];
1934       jmax = ai[rip[k] + 1];
1935       for (j = jmin; j < jmax; j++) {
1936         col = riip[aj[j]];
1937         if (col >= k) { /* only take upper triangular entry */
1938           rtmp[col] = aa[j];
1939           *bval++   = 0.0; /* for in-place factorization */
1940         }
1941       }
1942       /* shift the diagonal of the matrix */
1943       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1944 
1945       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1946       dk = rtmp[k];
1947       i  = jl[k]; /* first row to be added to k_th row  */
1948 
1949       while (i < k) {
1950         nexti = jl[i]; /* next row to be added to k_th row */
1951 
1952         /* compute multiplier, update diag(k) and U(i,k) */
1953         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
1954         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1955         dk += uikdi * ba[ili];
1956         ba[ili] = uikdi; /* -U(i,k) */
1957 
1958         /* add multiple of row i to k-th row */
1959         jmin = ili + 1;
1960         jmax = bi[i + 1];
1961         if (jmin < jmax) {
1962           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1963           /* update il and jl for row i */
1964           il[i] = jmin;
1965           j     = bj[jmin];
1966           jl[i] = jl[j];
1967           jl[j] = i;
1968         }
1969         i = nexti;
1970       }
1971 
1972       /* shift the diagonals when zero pivot is detected */
1973       /* compute rs=sum of abs(off-diagonal) */
1974       rs   = 0.0;
1975       jmin = bi[k] + 1;
1976       nz   = bi[k + 1] - jmin;
1977       bcol = bj + jmin;
1978       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
1979 
1980       sctx.rs = rs;
1981       sctx.pv = dk;
1982       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1983       if (sctx.newshift) break;
1984       dk = sctx.pv;
1985 
1986       /* copy data into U(k,:) */
1987       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1988       jmin      = bi[k] + 1;
1989       jmax      = bi[k + 1];
1990       if (jmin < jmax) {
1991         for (j = jmin; j < jmax; j++) {
1992           col   = bj[j];
1993           ba[j] = rtmp[col];
1994         }
1995         /* add the k-th row into il and jl */
1996         il[k] = jmin;
1997         i     = bj[jmin];
1998         jl[k] = jl[i];
1999         jl[i] = k;
2000       }
2001     }
2002   } while (sctx.newshift);
2003 
2004   PetscCall(PetscFree3(rtmp, il, jl));
2005   PetscCall(ISRestoreIndices(ip, &rip));
2006   PetscCall(ISRestoreIndices(iip, &riip));
2007   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2008 
2009   PetscCall(ISIdentity(ip, &perm_identity));
2010   if (perm_identity) {
2011     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2012     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2013     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2014     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2015   } else {
2016     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2017     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2018     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2019     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2020   }
2021 
2022   C->assembled    = PETSC_TRUE;
2023   C->preallocated = PETSC_TRUE;
2024 
2025   PetscCall(PetscLogFlops(C->rmap->n));
2026   if (sctx.nshift) {
2027     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2028       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2029     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2030       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2031     }
2032   }
2033   PetscFunctionReturn(PETSC_SUCCESS);
2034 }
2035 
2036 /*
2037    icc() under revised new data structure.
2038    Factored arrays bj and ba are stored as
2039      U(0,:),...,U(i,:),U(n-1,:)
2040 
2041    ui=fact->i is an array of size n+1, in which
2042    ui+
2043      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2044      ui[n]:  points to U(n-1,n-1)+1
2045 
2046   udiag=fact->diag is an array of size n,in which
2047      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2048 
2049    U(i,:) contains udiag[i] as its last entry, i.e.,
2050     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2051 */
2052 
2053 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2054 {
2055   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2056   Mat_SeqSBAIJ      *b;
2057   PetscBool          perm_identity;
2058   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2059   const PetscInt    *rip, *riip, *adiag;
2060   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2061   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
2062   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2063   PetscReal          fill = info->fill, levels = info->levels;
2064   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2065   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2066   PetscBT            lnkbt;
2067   IS                 iperm;
2068   PetscBool          diagDense;
2069 
2070   PetscFunctionBegin;
2071   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2072   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
2073   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
2074   PetscCall(ISIdentity(perm, &perm_identity));
2075   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2076 
2077   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2078   PetscCall(PetscMalloc1(am + 1, &udiag));
2079   ui[0] = 0;
2080 
2081   /* ICC(0) without matrix ordering: simply rearrange column indices */
2082   if (!levels && perm_identity) {
2083     for (i = 0; i < am; i++) {
2084       ncols     = ai[i + 1] - adiag[i];
2085       ui[i + 1] = ui[i] + ncols;
2086       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2087     }
2088     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2089     cols = uj;
2090     for (i = 0; i < am; i++) {
2091       aj    = a->j + adiag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2092       ncols = ai[i + 1] - adiag[i] - 1;
2093       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2094       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2095     }
2096   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2097     PetscCall(ISGetIndices(iperm, &riip));
2098     PetscCall(ISGetIndices(perm, &rip));
2099 
2100     /* initialization */
2101     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2102 
2103     /* jl: linked list for storing indices of the pivot rows
2104        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2105     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2106     for (i = 0; i < am; i++) {
2107       jl[i] = am;
2108       il[i] = 0;
2109     }
2110 
2111     /* create and initialize a linked list for storing column indices of the active row k */
2112     nlnk = am + 1;
2113     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2114 
2115     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2116     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2117     current_space = free_space;
2118     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2119     current_space_lvl = free_space_lvl;
2120 
2121     for (k = 0; k < am; k++) { /* for each active row k */
2122       /* initialize lnk by the column indices of row rip[k] of A */
2123       nzk   = 0;
2124       ncols = ai[rip[k] + 1] - ai[rip[k]];
2125       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2126       ncols_upper = 0;
2127       for (j = 0; j < ncols; j++) {
2128         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2129         if (riip[i] >= k) {         /* only take upper triangular entry */
2130           ajtmp[ncols_upper] = i;
2131           ncols_upper++;
2132         }
2133       }
2134       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2135       nzk += nlnk;
2136 
2137       /* update lnk by computing fill-in for each pivot row to be merged in */
2138       prow = jl[k]; /* 1st pivot row */
2139 
2140       while (prow < k) {
2141         nextprow = jl[prow];
2142 
2143         /* merge prow into k-th row */
2144         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2145         jmax  = ui[prow + 1];
2146         ncols = jmax - jmin;
2147         i     = jmin - ui[prow];
2148         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2149         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2150         j     = *(uj - 1);
2151         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2152         nzk += nlnk;
2153 
2154         /* update il and jl for prow */
2155         if (jmin < jmax) {
2156           il[prow] = jmin;
2157           j        = *cols;
2158           jl[prow] = jl[j];
2159           jl[j]    = prow;
2160         }
2161         prow = nextprow;
2162       }
2163 
2164       /* if free space is not available, make more free space */
2165       if (current_space->local_remaining < nzk) {
2166         i = am - k + 1;                                    /* num of unfactored rows */
2167         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2168         PetscCall(PetscFreeSpaceGet(i, &current_space));
2169         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2170         reallocs++;
2171       }
2172 
2173       /* copy data into free_space and free_space_lvl, then initialize lnk */
2174       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2175       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2176 
2177       /* add the k-th row into il and jl */
2178       if (nzk > 1) {
2179         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2180         jl[k] = jl[i];
2181         jl[i] = k;
2182         il[k] = ui[k] + 1;
2183       }
2184       uj_ptr[k]     = current_space->array;
2185       uj_lvl_ptr[k] = current_space_lvl->array;
2186 
2187       current_space->array += nzk;
2188       current_space->local_used += nzk;
2189       current_space->local_remaining -= nzk;
2190 
2191       current_space_lvl->array += nzk;
2192       current_space_lvl->local_used += nzk;
2193       current_space_lvl->local_remaining -= nzk;
2194 
2195       ui[k + 1] = ui[k] + nzk;
2196     }
2197 
2198     PetscCall(ISRestoreIndices(perm, &rip));
2199     PetscCall(ISRestoreIndices(iperm, &riip));
2200     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2201     PetscCall(PetscFree(ajtmp));
2202 
2203     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2204     PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2205     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
2206     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2207     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2208 
2209   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2210 
2211   /* put together the new matrix in MATSEQSBAIJ format */
2212   b          = (Mat_SeqSBAIJ *)fact->data;
2213   b->free_ij = PETSC_TRUE;
2214   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
2215   b->free_a = PETSC_TRUE;
2216   b->j      = uj;
2217   b->i      = ui;
2218   b->diag   = udiag;
2219   b->ilen   = NULL;
2220   b->imax   = NULL;
2221   b->row    = perm;
2222   b->col    = perm;
2223   PetscCall(PetscObjectReference((PetscObject)perm));
2224   PetscCall(PetscObjectReference((PetscObject)perm));
2225   b->icol          = iperm;
2226   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2227 
2228   PetscCall(PetscMalloc1(am, &b->solve_work));
2229 
2230   b->maxnz = b->nz = ui[am];
2231 
2232   fact->info.factor_mallocs   = reallocs;
2233   fact->info.fill_ratio_given = fill;
2234   if (ai[am] != 0) {
2235     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2236     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2237   } else {
2238     fact->info.fill_ratio_needed = 0.0;
2239   }
2240 #if defined(PETSC_USE_INFO)
2241   if (ai[am] != 0) {
2242     PetscReal af = fact->info.fill_ratio_needed;
2243     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2244     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2245     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2246   } else {
2247     PetscCall(PetscInfo(A, "Empty matrix\n"));
2248   }
2249 #endif
2250   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2251   PetscFunctionReturn(PETSC_SUCCESS);
2252 }
2253 
2254 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2255 {
2256   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2257   Mat_SeqSBAIJ      *b;
2258   PetscBool          perm_identity;
2259   PetscReal          fill = info->fill;
2260   const PetscInt    *rip, *riip;
2261   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2262   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2263   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2264   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2265   PetscBT            lnkbt;
2266   IS                 iperm;
2267   PetscBool          diagDense;
2268 
2269   PetscFunctionBegin;
2270   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2271   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
2272   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
2273 
2274   /* check whether perm is the identity mapping */
2275   PetscCall(ISIdentity(perm, &perm_identity));
2276   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2277   PetscCall(ISGetIndices(iperm, &riip));
2278   PetscCall(ISGetIndices(perm, &rip));
2279 
2280   /* initialization */
2281   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2282   PetscCall(PetscMalloc1(am + 1, &udiag));
2283   ui[0] = 0;
2284 
2285   /* jl: linked list for storing indices of the pivot rows
2286      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2287   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2288   for (i = 0; i < am; i++) {
2289     jl[i] = am;
2290     il[i] = 0;
2291   }
2292 
2293   /* create and initialize a linked list for storing column indices of the active row k */
2294   nlnk = am + 1;
2295   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2296 
2297   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2298   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2299   current_space = free_space;
2300 
2301   for (k = 0; k < am; k++) { /* for each active row k */
2302     /* initialize lnk by the column indices of row rip[k] of A */
2303     nzk   = 0;
2304     ncols = ai[rip[k] + 1] - ai[rip[k]];
2305     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2306     ncols_upper = 0;
2307     for (j = 0; j < ncols; j++) {
2308       i = riip[*(aj + ai[rip[k]] + j)];
2309       if (i >= k) { /* only take upper triangular entry */
2310         cols[ncols_upper] = i;
2311         ncols_upper++;
2312       }
2313     }
2314     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2315     nzk += nlnk;
2316 
2317     /* update lnk by computing fill-in for each pivot row to be merged in */
2318     prow = jl[k]; /* 1st pivot row */
2319 
2320     while (prow < k) {
2321       nextprow = jl[prow];
2322       /* merge prow into k-th row */
2323       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2324       jmax   = ui[prow + 1];
2325       ncols  = jmax - jmin;
2326       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2327       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2328       nzk += nlnk;
2329 
2330       /* update il and jl for prow */
2331       if (jmin < jmax) {
2332         il[prow] = jmin;
2333         j        = *uj_ptr;
2334         jl[prow] = jl[j];
2335         jl[j]    = prow;
2336       }
2337       prow = nextprow;
2338     }
2339 
2340     /* if free space is not available, make more free space */
2341     if (current_space->local_remaining < nzk) {
2342       i = am - k + 1;                                    /* num of unfactored rows */
2343       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2344       PetscCall(PetscFreeSpaceGet(i, &current_space));
2345       reallocs++;
2346     }
2347 
2348     /* copy data into free space, then initialize lnk */
2349     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2350 
2351     /* add the k-th row into il and jl */
2352     if (nzk > 1) {
2353       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2354       jl[k] = jl[i];
2355       jl[i] = k;
2356       il[k] = ui[k] + 1;
2357     }
2358     ui_ptr[k] = current_space->array;
2359 
2360     current_space->array += nzk;
2361     current_space->local_used += nzk;
2362     current_space->local_remaining -= nzk;
2363 
2364     ui[k + 1] = ui[k] + nzk;
2365   }
2366 
2367   PetscCall(ISRestoreIndices(perm, &rip));
2368   PetscCall(ISRestoreIndices(iperm, &riip));
2369   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2370 
2371   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2372   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscInt), (void **)&uj));
2373   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2374   PetscCall(PetscLLDestroy(lnk, lnkbt));
2375 
2376   /* put together the new matrix in MATSEQSBAIJ format */
2377   b          = (Mat_SeqSBAIJ *)fact->data;
2378   b->free_ij = PETSC_TRUE;
2379   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
2380   b->free_a = PETSC_TRUE;
2381   b->j      = uj;
2382   b->i      = ui;
2383   b->diag   = udiag;
2384   b->ilen   = NULL;
2385   b->imax   = NULL;
2386   b->row    = perm;
2387   b->col    = perm;
2388 
2389   PetscCall(PetscObjectReference((PetscObject)perm));
2390   PetscCall(PetscObjectReference((PetscObject)perm));
2391 
2392   b->icol          = iperm;
2393   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2394 
2395   PetscCall(PetscMalloc1(am, &b->solve_work));
2396 
2397   b->maxnz = b->nz = ui[am];
2398 
2399   fact->info.factor_mallocs   = reallocs;
2400   fact->info.fill_ratio_given = fill;
2401   if (ai[am] != 0) {
2402     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2403     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2404   } else {
2405     fact->info.fill_ratio_needed = 0.0;
2406   }
2407 #if defined(PETSC_USE_INFO)
2408   if (ai[am] != 0) {
2409     PetscReal af = fact->info.fill_ratio_needed;
2410     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2411     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2412     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2413   } else {
2414     PetscCall(PetscInfo(A, "Empty matrix\n"));
2415   }
2416 #endif
2417   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2418   PetscFunctionReturn(PETSC_SUCCESS);
2419 }
2420 
2421 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
2422 {
2423   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
2424   PetscInt           n  = A->rmap->n;
2425   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
2426   PetscScalar       *x, sum;
2427   const PetscScalar *b;
2428   const MatScalar   *aa, *v;
2429   PetscInt           i, nz;
2430 
2431   PetscFunctionBegin;
2432   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
2433 
2434   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2435   PetscCall(VecGetArrayRead(bb, &b));
2436   PetscCall(VecGetArrayWrite(xx, &x));
2437 
2438   /* forward solve the lower triangular */
2439   x[0] = b[0];
2440   v    = aa;
2441   vi   = aj;
2442   for (i = 1; i < n; i++) {
2443     nz  = ai[i + 1] - ai[i];
2444     sum = b[i];
2445     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
2446     v += nz;
2447     vi += nz;
2448     x[i] = sum;
2449   }
2450 
2451   /* backward solve the upper triangular */
2452   for (i = n - 1; i >= 0; i--) {
2453     v   = aa + adiag[i + 1] + 1;
2454     vi  = aj + adiag[i + 1] + 1;
2455     nz  = adiag[i] - adiag[i + 1] - 1;
2456     sum = x[i];
2457     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
2458     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
2459   }
2460 
2461   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
2462   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2463   PetscCall(VecRestoreArrayRead(bb, &b));
2464   PetscCall(VecRestoreArrayWrite(xx, &x));
2465   PetscFunctionReturn(PETSC_SUCCESS);
2466 }
2467 
2468 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
2469 {
2470   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
2471   IS                 iscol = a->col, isrow = a->row;
2472   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
2473   const PetscInt    *rout, *cout, *r, *c;
2474   PetscScalar       *x, *tmp, sum;
2475   const PetscScalar *b;
2476   const MatScalar   *aa, *v;
2477 
2478   PetscFunctionBegin;
2479   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
2480 
2481   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2482   PetscCall(VecGetArrayRead(bb, &b));
2483   PetscCall(VecGetArrayWrite(xx, &x));
2484   tmp = a->solve_work;
2485 
2486   PetscCall(ISGetIndices(isrow, &rout));
2487   r = rout;
2488   PetscCall(ISGetIndices(iscol, &cout));
2489   c = cout;
2490 
2491   /* forward solve the lower triangular */
2492   tmp[0] = b[r[0]];
2493   v      = aa;
2494   vi     = aj;
2495   for (i = 1; i < n; i++) {
2496     nz  = ai[i + 1] - ai[i];
2497     sum = b[r[i]];
2498     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
2499     tmp[i] = sum;
2500     v += nz;
2501     vi += nz;
2502   }
2503 
2504   /* backward solve the upper triangular */
2505   for (i = n - 1; i >= 0; i--) {
2506     v   = aa + adiag[i + 1] + 1;
2507     vi  = aj + adiag[i + 1] + 1;
2508     nz  = adiag[i] - adiag[i + 1] - 1;
2509     sum = tmp[i];
2510     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
2511     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
2512   }
2513 
2514   PetscCall(ISRestoreIndices(isrow, &rout));
2515   PetscCall(ISRestoreIndices(iscol, &cout));
2516   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2517   PetscCall(VecRestoreArrayRead(bb, &b));
2518   PetscCall(VecRestoreArrayWrite(xx, &x));
2519   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
2520   PetscFunctionReturn(PETSC_SUCCESS);
2521 }
2522