xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision ae1ee55146a7ad071171b861759b85940c7e5c67) !
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/sbaij/seq/sbaij.h>
3 #include <petscbt.h>
4 #include <../src/mat/utils/freespace.h>
5 
MatFactorGetSolverType_petsc(Mat A,MatSolverType * type)6 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
7 {
8   PetscFunctionBegin;
9   *type = MATSOLVERPETSC;
10   PetscFunctionReturn(PETSC_SUCCESS);
11 }
12 
MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat * B)13 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
14 {
15   PetscInt n = A->rmap->n;
16 
17   PetscFunctionBegin;
18   if (PetscDefined(USE_COMPLEX) && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
19     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
20     *B = NULL;
21     PetscFunctionReturn(PETSC_SUCCESS);
22   }
23 
24   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
25   PetscCall(MatSetSizes(*B, n, n, n, n));
26   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
27     PetscCall(MatSetType(*B, MATSEQAIJ));
28 
29     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
30     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
31 
32     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
33     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
34     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
35     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
36   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
37     PetscCall(MatSetType(*B, MATSEQSBAIJ));
38     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
39 
40     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
41     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
42     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
43     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
44   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
45   (*B)->factortype = ftype;
46 
47   PetscCall(PetscFree((*B)->solvertype));
48   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
49   (*B)->canuseordering = PETSC_TRUE;
50   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo * info)54 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
55 {
56   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
57   IS                 isicol;
58   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
59   PetscInt           i, n = A->rmap->n;
60   PetscInt          *bi, *bj;
61   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
62   PetscReal          f;
63   PetscInt           nlnk, *lnk, k, **bi_ptr;
64   PetscFreeSpaceList free_space = NULL, current_space = NULL;
65   PetscBT            lnkbt;
66   PetscBool          diagDense;
67 
68   PetscFunctionBegin;
69   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
70   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
71   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
72 
73   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
74   PetscCall(ISGetIndices(isrow, &r));
75   PetscCall(ISGetIndices(isicol, &ic));
76 
77   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
78   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
79   PetscCall(PetscMalloc1(n + 1, &bdiag));
80   bi[0] = bdiag[0] = 0;
81 
82   /* linked list for storing column indices of the active row */
83   nlnk = n + 1;
84   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
85 
86   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
87 
88   /* initial FreeSpace size is f*(ai[n]+1) */
89   f = info->fill;
90   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
91   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
92   current_space = free_space;
93 
94   for (i = 0; i < n; i++) {
95     /* copy previous fill into linked list */
96     nzi   = 0;
97     nnz   = ai[r[i] + 1] - ai[r[i]];
98     ajtmp = aj + ai[r[i]];
99     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
100     nzi += nlnk;
101 
102     /* add pivot rows into linked list */
103     row = lnk[n];
104     while (row < i) {
105       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
106       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
107       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
108       nzi += nlnk;
109       row = lnk[row];
110     }
111     bi[i + 1] = bi[i] + nzi;
112     im[i]     = nzi;
113 
114     /* mark bdiag */
115     nzbd = 0;
116     nnz  = nzi;
117     k    = lnk[n];
118     while (nnz-- && k < i) {
119       nzbd++;
120       k = lnk[k];
121     }
122     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
123 
124     /* if free space is not available, make more free space */
125     if (current_space->local_remaining < nzi) {
126       /* estimated additional space needed */
127       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
128       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
129       reallocs++;
130     }
131 
132     /* copy data into free space, then initialize lnk */
133     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
134 
135     bi_ptr[i] = current_space->array;
136     current_space->array += nzi;
137     current_space->local_used += nzi;
138     current_space->local_remaining -= nzi;
139   }
140 
141   PetscCall(ISRestoreIndices(isrow, &r));
142   PetscCall(ISRestoreIndices(isicol, &ic));
143 
144   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
145   PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
146   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
147   PetscCall(PetscLLDestroy(lnk, lnkbt));
148   PetscCall(PetscFree2(bi_ptr, im));
149 
150   /* put together the new matrix */
151   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
152   b          = (Mat_SeqAIJ *)B->data;
153   b->free_ij = PETSC_TRUE;
154   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
155   b->free_a = PETSC_TRUE;
156   b->j      = bj;
157   b->i      = bi;
158   b->diag   = bdiag;
159   b->ilen   = NULL;
160   b->imax   = NULL;
161   b->row    = isrow;
162   b->col    = iscol;
163   PetscCall(PetscObjectReference((PetscObject)isrow));
164   PetscCall(PetscObjectReference((PetscObject)iscol));
165   b->icol = isicol;
166   PetscCall(PetscMalloc1(n, &b->solve_work));
167 
168   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
169   b->maxnz = b->nz = bdiag[0] + 1;
170 
171   B->factortype            = MAT_FACTOR_LU;
172   B->info.factor_mallocs   = reallocs;
173   B->info.fill_ratio_given = f;
174 
175   if (ai[n]) {
176     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
177   } else {
178     B->info.fill_ratio_needed = 0.0;
179   }
180 #if defined(PETSC_USE_INFO)
181   if (ai[n] != 0) {
182     PetscReal af = B->info.fill_ratio_needed;
183     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
184     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
185     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
186     PetscCall(PetscInfo(A, "for best performance.\n"));
187   } else {
188     PetscCall(PetscInfo(A, "Empty matrix\n"));
189   }
190 #endif
191   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
192   if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
193   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
194   PetscFunctionReturn(PETSC_SUCCESS);
195 }
196 
197 /*
198     Trouble in factorization, should we dump the original matrix?
199 */
MatFactorDumpMatrix(Mat A)200 PetscErrorCode MatFactorDumpMatrix(Mat A)
201 {
202   PetscBool flg = PETSC_FALSE;
203 
204   PetscFunctionBegin;
205   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
206   if (flg) {
207     PetscViewer viewer;
208     char        filename[PETSC_MAX_PATH_LEN];
209 
210     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
211     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
212     PetscCall(MatView(A, viewer));
213     PetscCall(PetscViewerDestroy(&viewer));
214   }
215   PetscFunctionReturn(PETSC_SUCCESS);
216 }
217 
MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo * info)218 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
219 {
220   Mat              C = B;
221   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
222   IS               isrow = b->row, isicol = b->icol;
223   const PetscInt  *r, *ic, *ics;
224   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
225   PetscInt         i, j, k, nz, nzL, row, *pj;
226   const PetscInt  *ajtmp, *bjtmp;
227   MatScalar       *rtmp, *pc, multiplier, *pv;
228   const MatScalar *aa, *v;
229   MatScalar       *ba;
230   PetscBool        row_identity, col_identity;
231   FactorShiftCtx   sctx;
232   const PetscInt  *ddiag;
233   PetscReal        rs;
234   MatScalar        d;
235 
236   PetscFunctionBegin;
237   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
238   PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
239   /* MatPivotSetUp(): initialize shift context sctx */
240   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
241 
242   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
243     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
244     sctx.shift_top = info->zeropivot;
245     for (i = 0; i < n; i++) {
246       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
247       d  = aa[ddiag[i]];
248       rs = -PetscAbsScalar(d) - PetscRealPart(d);
249       v  = aa + ai[i];
250       nz = ai[i + 1] - ai[i];
251       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
252       if (rs > sctx.shift_top) sctx.shift_top = rs;
253     }
254     sctx.shift_top *= 1.1;
255     sctx.nshift_max = 5;
256     sctx.shift_lo   = 0.;
257     sctx.shift_hi   = 1.;
258   }
259 
260   PetscCall(ISGetIndices(isrow, &r));
261   PetscCall(ISGetIndices(isicol, &ic));
262   PetscCall(PetscMalloc1(n + 1, &rtmp));
263   ics = ic;
264 
265   do {
266     sctx.newshift = PETSC_FALSE;
267     for (i = 0; i < n; i++) {
268       /* zero rtmp */
269       /* L part */
270       nz    = bi[i + 1] - bi[i];
271       bjtmp = bj + bi[i];
272       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
273 
274       /* U part */
275       nz    = bdiag[i] - bdiag[i + 1];
276       bjtmp = bj + bdiag[i + 1] + 1;
277       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
278 
279       /* load in initial (unfactored row) */
280       nz    = ai[r[i] + 1] - ai[r[i]];
281       ajtmp = aj + ai[r[i]];
282       v     = aa + ai[r[i]];
283       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
284       /* ZeropivotApply() */
285       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
286 
287       /* elimination */
288       bjtmp = bj + bi[i];
289       row   = *bjtmp++;
290       nzL   = bi[i + 1] - bi[i];
291       for (k = 0; k < nzL; k++) {
292         pc = rtmp + row;
293         if (*pc != 0.0) {
294           pv         = ba + bdiag[row];
295           multiplier = *pc * (*pv);
296           *pc        = multiplier;
297 
298           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
299           pv = ba + bdiag[row + 1] + 1;
300           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
301 
302           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
303           PetscCall(PetscLogFlops(1 + 2.0 * nz));
304         }
305         row = *bjtmp++;
306       }
307 
308       /* finished row so stick it into b->a */
309       rs = 0.0;
310       /* L part */
311       pv = ba + bi[i];
312       pj = b->j + bi[i];
313       nz = bi[i + 1] - bi[i];
314       for (j = 0; j < nz; j++) {
315         pv[j] = rtmp[pj[j]];
316         rs += PetscAbsScalar(pv[j]);
317       }
318 
319       /* U part */
320       pv = ba + bdiag[i + 1] + 1;
321       pj = b->j + bdiag[i + 1] + 1;
322       nz = bdiag[i] - bdiag[i + 1] - 1;
323       for (j = 0; j < nz; j++) {
324         pv[j] = rtmp[pj[j]];
325         rs += PetscAbsScalar(pv[j]);
326       }
327 
328       sctx.rs = rs;
329       sctx.pv = rtmp[i];
330       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
331       if (sctx.newshift) break; /* break for-loop */
332       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
333 
334       /* Mark diagonal and invert diagonal for simpler triangular solves */
335       pv  = ba + bdiag[i];
336       *pv = 1.0 / rtmp[i];
337 
338     } /* endof for (i=0; i<n; i++) { */
339 
340     /* MatPivotRefine() */
341     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
342       /*
343        * if no shift in this attempt & shifting & started shifting & can refine,
344        * then try lower shift
345        */
346       sctx.shift_hi       = sctx.shift_fraction;
347       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
348       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
349       sctx.newshift       = PETSC_TRUE;
350       sctx.nshift++;
351     }
352   } while (sctx.newshift);
353 
354   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
355   PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
356 
357   PetscCall(PetscFree(rtmp));
358   PetscCall(ISRestoreIndices(isicol, &ic));
359   PetscCall(ISRestoreIndices(isrow, &r));
360 
361   PetscCall(ISIdentity(isrow, &row_identity));
362   PetscCall(ISIdentity(isicol, &col_identity));
363   if (b->inode.size_csr) {
364     C->ops->solve = MatSolve_SeqAIJ_Inode;
365   } else if (row_identity && col_identity) {
366     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
367   } else {
368     C->ops->solve = MatSolve_SeqAIJ;
369   }
370   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
371   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
372   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
373   C->ops->matsolve          = MatMatSolve_SeqAIJ;
374   C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
375   C->assembled              = PETSC_TRUE;
376   C->preallocated           = PETSC_TRUE;
377 
378   PetscCall(PetscLogFlops(C->cmap->n));
379 
380   /* MatShiftView(A,info,&sctx) */
381   if (sctx.nshift) {
382     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
383       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
384     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
385       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
386     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
387       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
388     }
389   }
390   PetscFunctionReturn(PETSC_SUCCESS);
391 }
392 
393 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
394 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
395 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
396 
MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo * info)397 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
398 {
399   Mat              C = B;
400   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
401   IS               isrow = b->row, isicol = b->icol;
402   const PetscInt  *r, *ic, *ics;
403   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
404   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
405   const PetscInt  *ajtmp, *bjtmp, *ddiag, *pj;
406   MatScalar       *pv, *rtmp, *pc, multiplier, d;
407   const MatScalar *v, *aa;
408   MatScalar       *ba;
409   PetscReal        rs = 0.0;
410   FactorShiftCtx   sctx;
411   PetscBool        row_identity, col_identity;
412 
413   PetscFunctionBegin;
414   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
415 
416   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
417   PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
418   /* MatPivotSetUp(): initialize shift context sctx */
419   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
420 
421   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
422     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
423     sctx.shift_top = info->zeropivot;
424     for (i = 0; i < n; i++) {
425       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
426       d  = aa[ddiag[i]];
427       rs = -PetscAbsScalar(d) - PetscRealPart(d);
428       v  = aa + ai[i];
429       nz = ai[i + 1] - ai[i];
430       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
431       if (rs > sctx.shift_top) sctx.shift_top = rs;
432     }
433     sctx.shift_top *= 1.1;
434     sctx.nshift_max = 5;
435     sctx.shift_lo   = 0.;
436     sctx.shift_hi   = 1.;
437   }
438 
439   PetscCall(ISGetIndices(isrow, &r));
440   PetscCall(ISGetIndices(isicol, &ic));
441   PetscCall(PetscMalloc1(n + 1, &rtmp));
442   ics = ic;
443 
444   do {
445     sctx.newshift = PETSC_FALSE;
446     for (i = 0; i < n; i++) {
447       nz    = bi[i + 1] - bi[i];
448       bjtmp = bj + bi[i];
449       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
450 
451       /* load in initial (unfactored row) */
452       nz    = ai[r[i] + 1] - ai[r[i]];
453       ajtmp = aj + ai[r[i]];
454       v     = aa + ai[r[i]];
455       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
456       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
457 
458       row = *bjtmp++;
459       while (row < i) {
460         pc = rtmp + row;
461         if (*pc != 0.0) {
462           pv         = ba + ddiag[row];
463           pj         = b->j + ddiag[row] + 1;
464           multiplier = *pc / *pv++;
465           *pc        = multiplier;
466           nz         = bi[row + 1] - ddiag[row] - 1;
467           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
468           PetscCall(PetscLogFlops(1 + 2.0 * nz));
469         }
470         row = *bjtmp++;
471       }
472       /* finished row so stick it into b->a */
473       pv   = ba + bi[i];
474       pj   = b->j + bi[i];
475       nz   = bi[i + 1] - bi[i];
476       diag = ddiag[i] - bi[i];
477       rs   = 0.0;
478       for (j = 0; j < nz; j++) {
479         pv[j] = rtmp[pj[j]];
480         rs += PetscAbsScalar(pv[j]);
481       }
482       rs -= PetscAbsScalar(pv[diag]);
483 
484       sctx.rs = rs;
485       sctx.pv = pv[diag];
486       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
487       if (sctx.newshift) break;
488       pv[diag] = sctx.pv;
489     }
490 
491     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
492       /*
493        * if no shift in this attempt & shifting & started shifting & can refine,
494        * then try lower shift
495        */
496       sctx.shift_hi       = sctx.shift_fraction;
497       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
498       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
499       sctx.newshift       = PETSC_TRUE;
500       sctx.nshift++;
501     }
502   } while (sctx.newshift);
503 
504   /* invert diagonal entries for simpler triangular solves */
505   for (i = 0; i < n; i++) ba[ddiag[i]] = 1.0 / ba[ddiag[i]];
506   PetscCall(PetscFree(rtmp));
507   PetscCall(ISRestoreIndices(isicol, &ic));
508   PetscCall(ISRestoreIndices(isrow, &r));
509   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
510   PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
511 
512   PetscCall(ISIdentity(isrow, &row_identity));
513   PetscCall(ISIdentity(isicol, &col_identity));
514   if (row_identity && col_identity) {
515     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
516   } else {
517     C->ops->solve = MatSolve_SeqAIJ_inplace;
518   }
519   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
520   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
521   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
522   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
523   C->ops->matsolvetranspose = NULL;
524 
525   C->assembled    = PETSC_TRUE;
526   C->preallocated = PETSC_TRUE;
527 
528   PetscCall(PetscLogFlops(C->cmap->n));
529   if (sctx.nshift) {
530     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
531       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
532     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
533       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
534     }
535   }
536   C->ops->solve          = MatSolve_SeqAIJ_inplace;
537   C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
538 
539   PetscCall(MatSeqAIJCheckInode(C));
540   PetscFunctionReturn(PETSC_SUCCESS);
541 }
542 
543 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
544 
545 /*
546    This routine implements inplace ILU(0) with row or/and column permutations.
547    Input:
548      A - original matrix
549    Output;
550      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
551          a->j (col index) is permuted by the inverse of colperm, then sorted
552          a->a reordered accordingly with a->j
553          a->diag (ptr to diagonal elements) is updated.
554 */
MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo * info)555 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
556 {
557   Mat_SeqAIJ     *a     = (Mat_SeqAIJ *)A->data;
558   IS              isrow = a->row, isicol = a->icol;
559   const PetscInt *r, *ic, *ics;
560   PetscInt        i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
561   PetscInt       *ajtmp, nz, row;
562   PetscInt        nbdiag, *pj;
563   PetscScalar    *rtmp, *pc, multiplier, d;
564   MatScalar      *pv, *v;
565   PetscReal       rs;
566   FactorShiftCtx  sctx;
567   MatScalar      *aa, *vtmp;
568   PetscInt       *diag;
569 
570   PetscFunctionBegin;
571   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
572 
573   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, (const PetscInt **)&diag, NULL));
574   PetscCall(MatSeqAIJGetArray(A, &aa));
575   /* MatPivotSetUp(): initialize shift context sctx */
576   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
577 
578   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
579     const PetscInt *ddiag;
580 
581     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
582     sctx.shift_top = info->zeropivot;
583     for (i = 0; i < n; i++) {
584       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
585       d    = aa[ddiag[i]];
586       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
587       vtmp = aa + ai[i];
588       nz   = ai[i + 1] - ai[i];
589       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
590       if (rs > sctx.shift_top) sctx.shift_top = rs;
591     }
592     sctx.shift_top *= 1.1;
593     sctx.nshift_max = 5;
594     sctx.shift_lo   = 0.;
595     sctx.shift_hi   = 1.;
596   }
597 
598   PetscCall(ISGetIndices(isrow, &r));
599   PetscCall(ISGetIndices(isicol, &ic));
600   PetscCall(PetscMalloc1(n + 1, &rtmp));
601   PetscCall(PetscArrayzero(rtmp, n + 1));
602   ics = ic;
603 
604 #if defined(MV)
605   sctx.shift_top      = 0.;
606   sctx.nshift_max     = 0;
607   sctx.shift_lo       = 0.;
608   sctx.shift_hi       = 0.;
609   sctx.shift_fraction = 0.;
610 
611   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
612     sctx.shift_top = 0.;
613     for (i = 0; i < n; i++) {
614       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
615       d  = aa[diag[i]];
616       rs = -PetscAbsScalar(d) - PetscRealPart(d);
617       v  = aa + ai[i];
618       nz = ai[i + 1] - ai[i];
619       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
620       if (rs > sctx.shift_top) sctx.shift_top = rs;
621     }
622     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
623     sctx.shift_top *= 1.1;
624     sctx.nshift_max = 5;
625     sctx.shift_lo   = 0.;
626     sctx.shift_hi   = 1.;
627   }
628 
629   sctx.shift_amount = 0.;
630   sctx.nshift       = 0;
631 #endif
632 
633   do {
634     sctx.newshift = PETSC_FALSE;
635     for (i = 0; i < n; i++) {
636       /* load in initial unfactored row */
637       nz    = ai[r[i] + 1] - ai[r[i]];
638       ajtmp = aj + ai[r[i]];
639       v     = aa + ai[r[i]];
640       /* sort permuted ajtmp and values v accordingly */
641       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
642       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
643 
644       diag[r[i]] = ai[r[i]];
645       for (j = 0; j < nz; j++) {
646         rtmp[ajtmp[j]] = v[j];
647         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
648       }
649       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
650 
651       row = *ajtmp++;
652       while (row < i) {
653         pc = rtmp + row;
654         if (*pc != 0.0) {
655           pv = aa + diag[r[row]];
656           pj = aj + diag[r[row]] + 1;
657 
658           multiplier = *pc / *pv++;
659           *pc        = multiplier;
660           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
661           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
662           PetscCall(PetscLogFlops(1 + 2.0 * nz));
663         }
664         row = *ajtmp++;
665       }
666       /* finished row so overwrite it onto aa */
667       pv     = aa + ai[r[i]];
668       pj     = aj + ai[r[i]];
669       nz     = ai[r[i] + 1] - ai[r[i]];
670       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
671 
672       rs = 0.0;
673       for (j = 0; j < nz; j++) {
674         pv[j] = rtmp[pj[j]];
675         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
676       }
677 
678       sctx.rs = rs;
679       sctx.pv = pv[nbdiag];
680       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
681       if (sctx.newshift) break;
682       pv[nbdiag] = sctx.pv;
683     }
684 
685     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
686       /*
687        * if no shift in this attempt & shifting & started shifting & can refine,
688        * then try lower shift
689        */
690       sctx.shift_hi       = sctx.shift_fraction;
691       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
692       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
693       sctx.newshift       = PETSC_TRUE;
694       sctx.nshift++;
695     }
696   } while (sctx.newshift);
697 
698   /* invert diagonal entries for simpler triangular solves */
699   for (i = 0; i < n; i++) aa[diag[r[i]]] = 1.0 / aa[diag[r[i]]];
700 
701   PetscCall(MatSeqAIJRestoreArray(A, &aa));
702   PetscCall(PetscFree(rtmp));
703   PetscCall(ISRestoreIndices(isicol, &ic));
704   PetscCall(ISRestoreIndices(isrow, &r));
705 
706   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
707   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
708   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
709   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
710 
711   A->assembled    = PETSC_TRUE;
712   A->preallocated = PETSC_TRUE;
713 
714   PetscCall(PetscLogFlops(A->cmap->n));
715   if (sctx.nshift) {
716     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
717       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
718     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
719       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
720     }
721   }
722   PetscFunctionReturn(PETSC_SUCCESS);
723 }
724 
MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo * info)725 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
726 {
727   Mat C;
728 
729   PetscFunctionBegin;
730   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
731   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
732   PetscCall(MatLUFactorNumeric(C, A, info));
733 
734   A->ops->solve          = C->ops->solve;
735   A->ops->solvetranspose = C->ops->solvetranspose;
736 
737   PetscCall(MatHeaderMerge(A, &C));
738   PetscFunctionReturn(PETSC_SUCCESS);
739 }
740 
MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)741 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
742 {
743   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
744   IS                 iscol = a->col, isrow = a->row;
745   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
746   PetscInt           nz;
747   const PetscInt    *rout, *cout, *r, *c;
748   PetscScalar       *x, *tmp, *tmps, sum;
749   const PetscScalar *b;
750   const MatScalar   *aa, *v;
751   const PetscInt    *adiag;
752 
753   PetscFunctionBegin;
754   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
755 
756   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
757   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
758   PetscCall(VecGetArrayRead(bb, &b));
759   PetscCall(VecGetArrayWrite(xx, &x));
760   tmp = a->solve_work;
761 
762   PetscCall(ISGetIndices(isrow, &rout));
763   r = rout;
764   PetscCall(ISGetIndices(iscol, &cout));
765   c = cout + (n - 1);
766 
767   /* forward solve the lower triangular */
768   tmp[0] = b[*r++];
769   tmps   = tmp;
770   for (i = 1; i < n; i++) {
771     v   = aa + ai[i];
772     vi  = aj + ai[i];
773     nz  = adiag[i] - ai[i];
774     sum = b[*r++];
775     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
776     tmp[i] = sum;
777   }
778 
779   /* backward solve the upper triangular */
780   for (i = n - 1; i >= 0; i--) {
781     v   = aa + adiag[i] + 1;
782     vi  = aj + adiag[i] + 1;
783     nz  = ai[i + 1] - adiag[i] - 1;
784     sum = tmp[i];
785     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
786     x[*c--] = tmp[i] = sum * aa[adiag[i]];
787   }
788   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
789   PetscCall(ISRestoreIndices(isrow, &rout));
790   PetscCall(ISRestoreIndices(iscol, &cout));
791   PetscCall(VecRestoreArrayRead(bb, &b));
792   PetscCall(VecRestoreArrayWrite(xx, &x));
793   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
794   PetscFunctionReturn(PETSC_SUCCESS);
795 }
796 
MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)797 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
798 {
799   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
800   IS                 iscol = a->col, isrow = a->row;
801   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
802   PetscInt           nz, neq, ldb, ldx;
803   const PetscInt    *rout, *cout, *r, *c;
804   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
805   const PetscScalar *b, *aa, *v;
806   PetscBool          isdense;
807   const PetscInt    *adiag;
808 
809   PetscFunctionBegin;
810   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
811   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
812   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
813   if (X != B) {
814     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
815     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
816   }
817   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
818   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
819   PetscCall(MatDenseGetArrayRead(B, &b));
820   PetscCall(MatDenseGetLDA(B, &ldb));
821   PetscCall(MatDenseGetArray(X, &x));
822   PetscCall(MatDenseGetLDA(X, &ldx));
823   PetscCall(ISGetIndices(isrow, &rout));
824   r = rout;
825   PetscCall(ISGetIndices(iscol, &cout));
826   c = cout;
827   for (neq = 0; neq < B->cmap->n; neq++) {
828     /* forward solve the lower triangular */
829     tmp[0] = b[r[0]];
830     tmps   = tmp;
831     for (i = 1; i < n; i++) {
832       v   = aa + ai[i];
833       vi  = aj + ai[i];
834       nz  = adiag[i] - ai[i];
835       sum = b[r[i]];
836       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
837       tmp[i] = sum;
838     }
839     /* backward solve the upper triangular */
840     for (i = n - 1; i >= 0; i--) {
841       v   = aa + adiag[i] + 1;
842       vi  = aj + adiag[i] + 1;
843       nz  = ai[i + 1] - adiag[i] - 1;
844       sum = tmp[i];
845       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
846       x[c[i]] = tmp[i] = sum * aa[adiag[i]];
847     }
848     b += ldb;
849     x += ldx;
850   }
851   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
852   PetscCall(ISRestoreIndices(isrow, &rout));
853   PetscCall(ISRestoreIndices(iscol, &cout));
854   PetscCall(MatDenseRestoreArrayRead(B, &b));
855   PetscCall(MatDenseRestoreArray(X, &x));
856   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
857   PetscFunctionReturn(PETSC_SUCCESS);
858 }
859 
MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)860 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
861 {
862   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
863   IS                 iscol = a->col, isrow = a->row;
864   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
865   const PetscInt    *adiag;
866   PetscInt           nz, neq, ldb, ldx;
867   const PetscInt    *rout, *cout, *r, *c;
868   PetscScalar       *x, *tmp = a->solve_work, sum;
869   const PetscScalar *b, *aa, *v;
870   PetscBool          isdense;
871 
872   PetscFunctionBegin;
873   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
874   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
875   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
876   if (X != B) {
877     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
878     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
879   }
880   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
881   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
882   PetscCall(MatDenseGetArrayRead(B, &b));
883   PetscCall(MatDenseGetLDA(B, &ldb));
884   PetscCall(MatDenseGetArray(X, &x));
885   PetscCall(MatDenseGetLDA(X, &ldx));
886   PetscCall(ISGetIndices(isrow, &rout));
887   r = rout;
888   PetscCall(ISGetIndices(iscol, &cout));
889   c = cout;
890   for (neq = 0; neq < B->cmap->n; neq++) {
891     /* forward solve the lower triangular */
892     tmp[0] = b[r[0]];
893     v      = aa;
894     vi     = aj;
895     for (i = 1; i < n; i++) {
896       nz  = ai[i + 1] - ai[i];
897       sum = b[r[i]];
898       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
899       tmp[i] = sum;
900       v += nz;
901       vi += nz;
902     }
903     /* backward solve the upper triangular */
904     for (i = n - 1; i >= 0; i--) {
905       v   = aa + adiag[i + 1] + 1;
906       vi  = aj + adiag[i + 1] + 1;
907       nz  = adiag[i] - adiag[i + 1] - 1;
908       sum = tmp[i];
909       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
910       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
911     }
912     b += ldb;
913     x += ldx;
914   }
915   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
916   PetscCall(ISRestoreIndices(isrow, &rout));
917   PetscCall(ISRestoreIndices(iscol, &cout));
918   PetscCall(MatDenseRestoreArrayRead(B, &b));
919   PetscCall(MatDenseRestoreArray(X, &x));
920   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
921   PetscFunctionReturn(PETSC_SUCCESS);
922 }
923 
MatMatSolveTranspose_SeqAIJ(Mat A,Mat B,Mat X)924 PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
925 {
926   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
927   IS                 iscol = a->col, isrow = a->row;
928   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, j;
929   const PetscInt    *adiag = a->diag;
930   PetscInt           nz, neq, ldb, ldx;
931   const PetscInt    *rout, *cout, *r, *c;
932   PetscScalar       *x, *tmp = a->solve_work, s1;
933   const PetscScalar *b, *aa, *v;
934   PetscBool          isdense;
935 
936   PetscFunctionBegin;
937   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
938   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
939   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
940   if (X != B) {
941     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
942     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
943   }
944   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
945   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
946   PetscCall(MatDenseGetArrayRead(B, &b));
947   PetscCall(MatDenseGetLDA(B, &ldb));
948   PetscCall(MatDenseGetArray(X, &x));
949   PetscCall(MatDenseGetLDA(X, &ldx));
950   PetscCall(ISGetIndices(isrow, &rout));
951   r = rout;
952   PetscCall(ISGetIndices(iscol, &cout));
953   c = cout;
954   for (neq = 0; neq < B->cmap->n; neq++) {
955     /* copy the b into temp work space according to permutation */
956     for (i = 0; i < n; i++) tmp[i] = b[c[i]];
957 
958     /* forward solve the U^T */
959     for (i = 0; i < n; i++) {
960       v  = aa + adiag[i + 1] + 1;
961       vi = aj + adiag[i + 1] + 1;
962       nz = adiag[i] - adiag[i + 1] - 1;
963       s1 = tmp[i];
964       s1 *= v[nz]; /* multiply by inverse of diagonal entry */
965       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
966       tmp[i] = s1;
967     }
968 
969     /* backward solve the L^T */
970     for (i = n - 1; i >= 0; i--) {
971       v  = aa + ai[i];
972       vi = aj + ai[i];
973       nz = ai[i + 1] - ai[i];
974       s1 = tmp[i];
975       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
976     }
977 
978     /* copy tmp into x according to permutation */
979     for (i = 0; i < n; i++) x[r[i]] = tmp[i];
980     b += ldb;
981     x += ldx;
982   }
983   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
984   PetscCall(ISRestoreIndices(isrow, &rout));
985   PetscCall(ISRestoreIndices(iscol, &cout));
986   PetscCall(MatDenseRestoreArrayRead(B, &b));
987   PetscCall(MatDenseRestoreArray(X, &x));
988   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
989   PetscFunctionReturn(PETSC_SUCCESS);
990 }
991 
MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)992 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
993 {
994   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
995   IS                 iscol = a->col, isrow = a->row;
996   const PetscInt    *r, *c, *rout, *cout, *adiag;
997   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
998   PetscInt           nz, row;
999   PetscScalar       *x, *tmp, *tmps, sum;
1000   const PetscScalar *b;
1001   const MatScalar   *aa, *v;
1002 
1003   PetscFunctionBegin;
1004   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1005 
1006   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1007   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1008   PetscCall(VecGetArrayRead(bb, &b));
1009   PetscCall(VecGetArrayWrite(xx, &x));
1010   tmp = a->solve_work;
1011 
1012   PetscCall(ISGetIndices(isrow, &rout));
1013   r = rout;
1014   PetscCall(ISGetIndices(iscol, &cout));
1015   c = cout + (n - 1);
1016 
1017   /* forward solve the lower triangular */
1018   tmp[0] = b[*r++];
1019   tmps   = tmp;
1020   for (row = 1; row < n; row++) {
1021     i   = rout[row]; /* permuted row */
1022     v   = aa + ai[i];
1023     vi  = aj + ai[i];
1024     nz  = adiag[i] - ai[i];
1025     sum = b[*r++];
1026     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1027     tmp[row] = sum;
1028   }
1029 
1030   /* backward solve the upper triangular */
1031   for (row = n - 1; row >= 0; row--) {
1032     i   = rout[row]; /* permuted row */
1033     v   = aa + adiag[i] + 1;
1034     vi  = aj + adiag[i] + 1;
1035     nz  = ai[i + 1] - adiag[i] - 1;
1036     sum = tmp[row];
1037     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1038     x[*c--] = tmp[row] = sum * aa[adiag[i]];
1039   }
1040   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1041   PetscCall(ISRestoreIndices(isrow, &rout));
1042   PetscCall(ISRestoreIndices(iscol, &cout));
1043   PetscCall(VecRestoreArrayRead(bb, &b));
1044   PetscCall(VecRestoreArrayWrite(xx, &x));
1045   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1046   PetscFunctionReturn(PETSC_SUCCESS);
1047 }
1048 
1049 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)1050 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1051 {
1052   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1053   PetscInt           n  = A->rmap->n;
1054   const PetscInt    *ai = a->i, *aj = a->j, *adiag;
1055   PetscScalar       *x;
1056   const PetscScalar *b;
1057   const MatScalar   *aa;
1058 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1059   PetscInt         adiag_i, i, nz, ai_i;
1060   const PetscInt  *vi;
1061   const MatScalar *v;
1062   PetscScalar      sum;
1063 #endif
1064 
1065   PetscFunctionBegin;
1066   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1067   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1068   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1069   PetscCall(VecGetArrayRead(bb, &b));
1070   PetscCall(VecGetArrayWrite(xx, &x));
1071 
1072 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1073   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1074 #else
1075   /* forward solve the lower triangular */
1076   x[0] = b[0];
1077   for (i = 1; i < n; i++) {
1078     ai_i = ai[i];
1079     v    = aa + ai_i;
1080     vi   = aj + ai_i;
1081     nz   = adiag[i] - ai_i;
1082     sum  = b[i];
1083     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1084     x[i] = sum;
1085   }
1086 
1087   /* backward solve the upper triangular */
1088   for (i = n - 1; i >= 0; i--) {
1089     adiag_i = adiag[i];
1090     v       = aa + adiag_i + 1;
1091     vi      = aj + adiag_i + 1;
1092     nz      = ai[i + 1] - adiag_i - 1;
1093     sum     = x[i];
1094     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1095     x[i] = sum * aa[adiag_i];
1096   }
1097 #endif
1098   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1099   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1100   PetscCall(VecRestoreArrayRead(bb, &b));
1101   PetscCall(VecRestoreArrayWrite(xx, &x));
1102   PetscFunctionReturn(PETSC_SUCCESS);
1103 }
1104 
MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)1105 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1106 {
1107   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1108   IS                 iscol = a->col, isrow = a->row;
1109   PetscInt           i, n                  = A->rmap->n, j;
1110   PetscInt           nz;
1111   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag;
1112   PetscScalar       *x, *tmp, sum;
1113   const PetscScalar *b;
1114   const MatScalar   *aa, *v;
1115 
1116   PetscFunctionBegin;
1117   if (yy != xx) PetscCall(VecCopy(yy, xx));
1118 
1119   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1120   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1121   PetscCall(VecGetArrayRead(bb, &b));
1122   PetscCall(VecGetArray(xx, &x));
1123   tmp = a->solve_work;
1124 
1125   PetscCall(ISGetIndices(isrow, &rout));
1126   r = rout;
1127   PetscCall(ISGetIndices(iscol, &cout));
1128   c = cout + (n - 1);
1129 
1130   /* forward solve the lower triangular */
1131   tmp[0] = b[*r++];
1132   for (i = 1; i < n; i++) {
1133     v   = aa + ai[i];
1134     vi  = aj + ai[i];
1135     nz  = adiag[i] - ai[i];
1136     sum = b[*r++];
1137     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1138     tmp[i] = sum;
1139   }
1140 
1141   /* backward solve the upper triangular */
1142   for (i = n - 1; i >= 0; i--) {
1143     v   = aa + adiag[i] + 1;
1144     vi  = aj + adiag[i] + 1;
1145     nz  = ai[i + 1] - adiag[i] - 1;
1146     sum = tmp[i];
1147     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1148     tmp[i] = sum * aa[adiag[i]];
1149     x[*c--] += tmp[i];
1150   }
1151 
1152   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1153   PetscCall(ISRestoreIndices(isrow, &rout));
1154   PetscCall(ISRestoreIndices(iscol, &cout));
1155   PetscCall(VecRestoreArrayRead(bb, &b));
1156   PetscCall(VecRestoreArray(xx, &x));
1157   PetscCall(PetscLogFlops(2.0 * a->nz));
1158   PetscFunctionReturn(PETSC_SUCCESS);
1159 }
1160 
MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)1161 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1162 {
1163   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1164   IS                 iscol = a->col, isrow = a->row;
1165   PetscInt           i, n                  = A->rmap->n, j;
1166   PetscInt           nz;
1167   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag;
1168   PetscScalar       *x, *tmp, sum;
1169   const PetscScalar *b;
1170   const MatScalar   *aa, *v;
1171 
1172   PetscFunctionBegin;
1173   if (yy != xx) PetscCall(VecCopy(yy, xx));
1174 
1175   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1176   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1177   PetscCall(VecGetArrayRead(bb, &b));
1178   PetscCall(VecGetArray(xx, &x));
1179   tmp = a->solve_work;
1180 
1181   PetscCall(ISGetIndices(isrow, &rout));
1182   r = rout;
1183   PetscCall(ISGetIndices(iscol, &cout));
1184   c = cout;
1185 
1186   /* forward solve the lower triangular */
1187   tmp[0] = b[r[0]];
1188   v      = aa;
1189   vi     = aj;
1190   for (i = 1; i < n; i++) {
1191     nz  = ai[i + 1] - ai[i];
1192     sum = b[r[i]];
1193     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1194     tmp[i] = sum;
1195     v += nz;
1196     vi += nz;
1197   }
1198 
1199   /* backward solve the upper triangular */
1200   v  = aa + adiag[n - 1];
1201   vi = aj + adiag[n - 1];
1202   for (i = n - 1; i >= 0; i--) {
1203     nz  = adiag[i] - adiag[i + 1] - 1;
1204     sum = tmp[i];
1205     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1206     tmp[i] = sum * v[nz];
1207     x[c[i]] += tmp[i];
1208     v += nz + 1;
1209     vi += nz + 1;
1210   }
1211 
1212   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1213   PetscCall(ISRestoreIndices(isrow, &rout));
1214   PetscCall(ISRestoreIndices(iscol, &cout));
1215   PetscCall(VecRestoreArrayRead(bb, &b));
1216   PetscCall(VecRestoreArray(xx, &x));
1217   PetscCall(PetscLogFlops(2.0 * a->nz));
1218   PetscFunctionReturn(PETSC_SUCCESS);
1219 }
1220 
MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)1221 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1222 {
1223   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1224   IS                 iscol = a->col, isrow = a->row;
1225   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1226   PetscInt           i, n = A->rmap->n, j;
1227   PetscInt           nz;
1228   PetscScalar       *x, *tmp, s1;
1229   const MatScalar   *aa, *v;
1230   const PetscScalar *b;
1231 
1232   PetscFunctionBegin;
1233   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1234   PetscCall(VecGetArrayRead(bb, &b));
1235   PetscCall(VecGetArrayWrite(xx, &x));
1236   tmp = a->solve_work;
1237 
1238   PetscCall(ISGetIndices(isrow, &rout));
1239   r = rout;
1240   PetscCall(ISGetIndices(iscol, &cout));
1241   c = cout;
1242 
1243   /* copy the b into temp work space according to permutation */
1244   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1245 
1246   /* forward solve the U^T */
1247   for (i = 0; i < n; i++) {
1248     v  = aa + diag[i];
1249     vi = aj + diag[i] + 1;
1250     nz = ai[i + 1] - diag[i] - 1;
1251     s1 = tmp[i];
1252     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1253     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1254     tmp[i] = s1;
1255   }
1256 
1257   /* backward solve the L^T */
1258   for (i = n - 1; i >= 0; i--) {
1259     v  = aa + diag[i] - 1;
1260     vi = aj + diag[i] - 1;
1261     nz = diag[i] - ai[i];
1262     s1 = tmp[i];
1263     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1264   }
1265 
1266   /* copy tmp into x according to permutation */
1267   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1268 
1269   PetscCall(ISRestoreIndices(isrow, &rout));
1270   PetscCall(ISRestoreIndices(iscol, &cout));
1271   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1272   PetscCall(VecRestoreArrayRead(bb, &b));
1273   PetscCall(VecRestoreArrayWrite(xx, &x));
1274 
1275   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1276   PetscFunctionReturn(PETSC_SUCCESS);
1277 }
1278 
MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)1279 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1280 {
1281   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1282   IS                 iscol = a->col, isrow = a->row;
1283   const PetscInt    *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi;
1284   PetscInt           i, n = A->rmap->n, j;
1285   PetscInt           nz;
1286   PetscScalar       *x, *tmp, s1;
1287   const MatScalar   *aa, *v;
1288   const PetscScalar *b;
1289 
1290   PetscFunctionBegin;
1291   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1292   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1293   PetscCall(VecGetArrayRead(bb, &b));
1294   PetscCall(VecGetArrayWrite(xx, &x));
1295   tmp = a->solve_work;
1296 
1297   PetscCall(ISGetIndices(isrow, &rout));
1298   r = rout;
1299   PetscCall(ISGetIndices(iscol, &cout));
1300   c = cout;
1301 
1302   /* copy the b into temp work space according to permutation */
1303   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1304 
1305   /* forward solve the U^T */
1306   for (i = 0; i < n; i++) {
1307     v  = aa + adiag[i + 1] + 1;
1308     vi = aj + adiag[i + 1] + 1;
1309     nz = adiag[i] - adiag[i + 1] - 1;
1310     s1 = tmp[i];
1311     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1312     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1313     tmp[i] = s1;
1314   }
1315 
1316   /* backward solve the L^T */
1317   for (i = n - 1; i >= 0; i--) {
1318     v  = aa + ai[i];
1319     vi = aj + ai[i];
1320     nz = ai[i + 1] - ai[i];
1321     s1 = tmp[i];
1322     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1323   }
1324 
1325   /* copy tmp into x according to permutation */
1326   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1327 
1328   PetscCall(ISRestoreIndices(isrow, &rout));
1329   PetscCall(ISRestoreIndices(iscol, &cout));
1330   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1331   PetscCall(VecRestoreArrayRead(bb, &b));
1332   PetscCall(VecRestoreArrayWrite(xx, &x));
1333 
1334   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1335   PetscFunctionReturn(PETSC_SUCCESS);
1336 }
1337 
MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)1338 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1339 {
1340   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1341   IS                 iscol = a->col, isrow = a->row;
1342   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1343   PetscInt           i, n = A->rmap->n, j;
1344   PetscInt           nz;
1345   PetscScalar       *x, *tmp, s1;
1346   const MatScalar   *aa, *v;
1347   const PetscScalar *b;
1348 
1349   PetscFunctionBegin;
1350   if (zz != xx) PetscCall(VecCopy(zz, xx));
1351   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1352   PetscCall(VecGetArrayRead(bb, &b));
1353   PetscCall(VecGetArray(xx, &x));
1354   tmp = a->solve_work;
1355 
1356   PetscCall(ISGetIndices(isrow, &rout));
1357   r = rout;
1358   PetscCall(ISGetIndices(iscol, &cout));
1359   c = cout;
1360 
1361   /* copy the b into temp work space according to permutation */
1362   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1363 
1364   /* forward solve the U^T */
1365   for (i = 0; i < n; i++) {
1366     v  = aa + diag[i];
1367     vi = aj + diag[i] + 1;
1368     nz = ai[i + 1] - diag[i] - 1;
1369     s1 = tmp[i];
1370     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1371     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1372     tmp[i] = s1;
1373   }
1374 
1375   /* backward solve the L^T */
1376   for (i = n - 1; i >= 0; i--) {
1377     v  = aa + diag[i] - 1;
1378     vi = aj + diag[i] - 1;
1379     nz = diag[i] - ai[i];
1380     s1 = tmp[i];
1381     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1382   }
1383 
1384   /* copy tmp into x according to permutation */
1385   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1386 
1387   PetscCall(ISRestoreIndices(isrow, &rout));
1388   PetscCall(ISRestoreIndices(iscol, &cout));
1389   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1390   PetscCall(VecRestoreArrayRead(bb, &b));
1391   PetscCall(VecRestoreArray(xx, &x));
1392 
1393   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1394   PetscFunctionReturn(PETSC_SUCCESS);
1395 }
1396 
MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)1397 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1398 {
1399   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1400   IS                 iscol = a->col, isrow = a->row;
1401   const PetscInt    *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi;
1402   PetscInt           i, n = A->rmap->n, j;
1403   PetscInt           nz;
1404   PetscScalar       *x, *tmp, s1;
1405   const MatScalar   *aa, *v;
1406   const PetscScalar *b;
1407 
1408   PetscFunctionBegin;
1409   if (zz != xx) PetscCall(VecCopy(zz, xx));
1410   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1411   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1412   PetscCall(VecGetArrayRead(bb, &b));
1413   PetscCall(VecGetArray(xx, &x));
1414   tmp = a->solve_work;
1415 
1416   PetscCall(ISGetIndices(isrow, &rout));
1417   r = rout;
1418   PetscCall(ISGetIndices(iscol, &cout));
1419   c = cout;
1420 
1421   /* copy the b into temp work space according to permutation */
1422   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1423 
1424   /* forward solve the U^T */
1425   for (i = 0; i < n; i++) {
1426     v  = aa + adiag[i + 1] + 1;
1427     vi = aj + adiag[i + 1] + 1;
1428     nz = adiag[i] - adiag[i + 1] - 1;
1429     s1 = tmp[i];
1430     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1431     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1432     tmp[i] = s1;
1433   }
1434 
1435   /* backward solve the L^T */
1436   for (i = n - 1; i >= 0; i--) {
1437     v  = aa + ai[i];
1438     vi = aj + ai[i];
1439     nz = ai[i + 1] - ai[i];
1440     s1 = tmp[i];
1441     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1442   }
1443 
1444   /* copy tmp into x according to permutation */
1445   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1446 
1447   PetscCall(ISRestoreIndices(isrow, &rout));
1448   PetscCall(ISRestoreIndices(iscol, &cout));
1449   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1450   PetscCall(VecRestoreArrayRead(bb, &b));
1451   PetscCall(VecRestoreArray(xx, &x));
1452 
1453   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1454   PetscFunctionReturn(PETSC_SUCCESS);
1455 }
1456 
1457 /*
1458    ilu() under revised new data structure.
1459    Factored arrays bj and ba are stored as
1460      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1461 
1462    bi=fact->i is an array of size n+1, in which
1463      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1464      bi[n]:  points to L(n-1,n-1)+1
1465 
1466   bdiag=fact->diag is an array of size n+1,in which
1467      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1468      bdiag[n]: points to entry of U(n-1,0)-1
1469 
1470    U(i,:) contains bdiag[i] as its last entry, i.e.,
1471     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1472 */
MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo * info)1473 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1474 {
1475   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1476   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag;
1477   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1478   IS             isicol;
1479 
1480   PetscFunctionBegin;
1481   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1482   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1483   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1484   b = (Mat_SeqAIJ *)fact->data;
1485 
1486   /* allocate matrix arrays for new data structure */
1487   PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscScalar), (void **)&b->a));
1488   PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscInt), (void **)&b->j));
1489   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i));
1490   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1491   b->free_a  = PETSC_TRUE;
1492   b->free_ij = PETSC_TRUE;
1493 
1494   if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1495   bdiag = b->diag;
1496 
1497   /* set bi and bj with new data structure */
1498   bi = b->i;
1499   bj = b->j;
1500 
1501   /* L part */
1502   bi[0] = 0;
1503   for (i = 0; i < n; i++) {
1504     nz        = adiag[i] - ai[i];
1505     bi[i + 1] = bi[i] + nz;
1506     aj        = a->j + ai[i];
1507     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1508   }
1509 
1510   /* U part */
1511   bdiag[n] = bi[n] - 1;
1512   for (i = n - 1; i >= 0; i--) {
1513     nz = ai[i + 1] - adiag[i] - 1;
1514     aj = a->j + adiag[i] + 1;
1515     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1516     /* diag[i] */
1517     bj[k++]  = i;
1518     bdiag[i] = bdiag[i + 1] + nz + 1;
1519   }
1520 
1521   fact->factortype             = MAT_FACTOR_ILU;
1522   fact->info.factor_mallocs    = 0;
1523   fact->info.fill_ratio_given  = info->fill;
1524   fact->info.fill_ratio_needed = 1.0;
1525   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1526   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1527 
1528   b       = (Mat_SeqAIJ *)fact->data;
1529   b->row  = isrow;
1530   b->col  = iscol;
1531   b->icol = isicol;
1532   PetscCall(PetscMalloc1(fact->rmap->n, &b->solve_work));
1533   PetscCall(PetscObjectReference((PetscObject)isrow));
1534   PetscCall(PetscObjectReference((PetscObject)iscol));
1535   PetscFunctionReturn(PETSC_SUCCESS);
1536 }
1537 
MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo * info)1538 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1539 {
1540   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1541   IS                 isicol;
1542   const PetscInt    *r, *ic;
1543   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1544   PetscInt          *bi, *cols, nnz, *cols_lvl;
1545   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1546   PetscInt           i, levels, diagonal_fill;
1547   PetscBool          col_identity, row_identity;
1548   PetscReal          f;
1549   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1550   PetscBT            lnkbt;
1551   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1552   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1553   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1554   PetscBool          diagDense;
1555 
1556   PetscFunctionBegin;
1557   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1558   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
1559   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
1560 
1561   levels = (PetscInt)info->levels;
1562   PetscCall(ISIdentity(isrow, &row_identity));
1563   PetscCall(ISIdentity(iscol, &col_identity));
1564   if (!levels && row_identity && col_identity) {
1565     /* special case: ilu(0) with natural ordering */
1566     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1567     if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1568     PetscFunctionReturn(PETSC_SUCCESS);
1569   }
1570 
1571   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1572   PetscCall(ISGetIndices(isrow, &r));
1573   PetscCall(ISGetIndices(isicol, &ic));
1574 
1575   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1576   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
1577   PetscCall(PetscMalloc1(n + 1, &bdiag));
1578   bi[0] = bdiag[0] = 0;
1579   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1580 
1581   /* create a linked list for storing column indices of the active row */
1582   nlnk = n + 1;
1583   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1584 
1585   /* initial FreeSpace size is f*(ai[n]+1) */
1586   f             = info->fill;
1587   diagonal_fill = (PetscInt)info->diagonal_fill;
1588   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1589   current_space = free_space;
1590   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1591   current_space_lvl = free_space_lvl;
1592   for (i = 0; i < n; i++) {
1593     nzi = 0;
1594     /* copy current row into linked list */
1595     nnz = ai[r[i] + 1] - ai[r[i]];
1596     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1597     cols   = aj + ai[r[i]];
1598     lnk[i] = -1; /* marker to indicate if diagonal exists */
1599     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1600     nzi += nlnk;
1601 
1602     /* make sure diagonal entry is included */
1603     if (diagonal_fill && lnk[i] == -1) {
1604       fm = n;
1605       while (lnk[fm] < i) fm = lnk[fm];
1606       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1607       lnk[fm]    = i;
1608       lnk_lvl[i] = 0;
1609       nzi++;
1610       dcount++;
1611     }
1612 
1613     /* add pivot rows into the active row */
1614     nzbd = 0;
1615     prow = lnk[n];
1616     while (prow < i) {
1617       nnz      = bdiag[prow];
1618       cols     = bj_ptr[prow] + nnz + 1;
1619       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1620       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1621       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1622       nzi += nlnk;
1623       prow = lnk[prow];
1624       nzbd++;
1625     }
1626     bdiag[i]  = nzbd;
1627     bi[i + 1] = bi[i] + nzi;
1628     /* if free space is not available, make more free space */
1629     if (current_space->local_remaining < nzi) {
1630       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1631       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1632       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1633       reallocs++;
1634     }
1635 
1636     /* copy data into free_space and free_space_lvl, then initialize lnk */
1637     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1638     bj_ptr[i]    = current_space->array;
1639     bjlvl_ptr[i] = current_space_lvl->array;
1640 
1641     /* make sure the active row i has diagonal entry */
1642     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1643 
1644     current_space->array += nzi;
1645     current_space->local_used += nzi;
1646     current_space->local_remaining -= nzi;
1647     current_space_lvl->array += nzi;
1648     current_space_lvl->local_used += nzi;
1649     current_space_lvl->local_remaining -= nzi;
1650   }
1651 
1652   PetscCall(ISRestoreIndices(isrow, &r));
1653   PetscCall(ISRestoreIndices(isicol, &ic));
1654   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1655   PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
1656   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1657 
1658   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1659   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1660   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1661 
1662 #if defined(PETSC_USE_INFO)
1663   {
1664     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1665     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1666     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1667     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1668     PetscCall(PetscInfo(A, "for best performance.\n"));
1669     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1670   }
1671 #endif
1672   /* put together the new matrix */
1673   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1674   b          = (Mat_SeqAIJ *)fact->data;
1675   b->free_ij = PETSC_TRUE;
1676   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
1677   b->free_a = PETSC_TRUE;
1678   b->j      = bj;
1679   b->i      = bi;
1680   b->diag   = bdiag;
1681   b->ilen   = NULL;
1682   b->imax   = NULL;
1683   b->row    = isrow;
1684   b->col    = iscol;
1685   PetscCall(PetscObjectReference((PetscObject)isrow));
1686   PetscCall(PetscObjectReference((PetscObject)iscol));
1687   b->icol = isicol;
1688 
1689   PetscCall(PetscMalloc1(n, &b->solve_work));
1690   /* In b structure:  Free imax, ilen, old a, old j.
1691      Allocate bdiag, solve_work, new a, new j */
1692   b->maxnz = b->nz = bdiag[0] + 1;
1693 
1694   fact->info.factor_mallocs    = reallocs;
1695   fact->info.fill_ratio_given  = f;
1696   fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1697   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1698   if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1699   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1700   PetscFunctionReturn(PETSC_SUCCESS);
1701 }
1702 
MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo * info)1703 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
1704 {
1705   Mat              C  = B;
1706   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
1707   Mat_SeqSBAIJ    *b  = (Mat_SeqSBAIJ *)C->data;
1708   IS               ip = b->row, iip = b->icol;
1709   const PetscInt  *rip, *riip;
1710   PetscInt         i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1711   PetscInt        *ai = a->i, *aj = a->j;
1712   PetscInt         k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1713   MatScalar       *rtmp, *ba = b->a, *bval, dk, uikdi;
1714   PetscBool        perm_identity;
1715   FactorShiftCtx   sctx;
1716   PetscReal        rs;
1717   const MatScalar *aa, *v;
1718   MatScalar        d;
1719   const PetscInt  *adiag;
1720 
1721   PetscFunctionBegin;
1722   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1723   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1724   /* MatPivotSetUp(): initialize shift context sctx */
1725   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1726 
1727   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1728     sctx.shift_top = info->zeropivot;
1729     for (i = 0; i < mbs; i++) {
1730       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1731       d  = aa[adiag[i]];
1732       rs = -PetscAbsScalar(d) - PetscRealPart(d);
1733       v  = aa + ai[i];
1734       nz = ai[i + 1] - ai[i];
1735       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
1736       if (rs > sctx.shift_top) sctx.shift_top = rs;
1737     }
1738     sctx.shift_top *= 1.1;
1739     sctx.nshift_max = 5;
1740     sctx.shift_lo   = 0.;
1741     sctx.shift_hi   = 1.;
1742   }
1743 
1744   PetscCall(ISGetIndices(ip, &rip));
1745   PetscCall(ISGetIndices(iip, &riip));
1746 
1747   /* allocate working arrays
1748      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1749      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1750   */
1751   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
1752 
1753   do {
1754     sctx.newshift = PETSC_FALSE;
1755 
1756     for (i = 0; i < mbs; i++) c2r[i] = mbs;
1757     if (mbs) il[0] = 0;
1758 
1759     for (k = 0; k < mbs; k++) {
1760       /* zero rtmp */
1761       nz    = bi[k + 1] - bi[k];
1762       bjtmp = bj + bi[k];
1763       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1764 
1765       /* load in initial unfactored row */
1766       bval = ba + bi[k];
1767       jmin = ai[rip[k]];
1768       jmax = ai[rip[k] + 1];
1769       for (j = jmin; j < jmax; j++) {
1770         col = riip[aj[j]];
1771         if (col >= k) { /* only take upper triangular entry */
1772           rtmp[col] = aa[j];
1773           *bval++   = 0.0; /* for in-place factorization */
1774         }
1775       }
1776       /* shift the diagonal of the matrix: ZeropivotApply() */
1777       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
1778 
1779       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1780       dk = rtmp[k];
1781       i  = c2r[k]; /* first row to be added to k_th row  */
1782 
1783       while (i < k) {
1784         nexti = c2r[i]; /* next row to be added to k_th row */
1785 
1786         /* compute multiplier, update diag(k) and U(i,k) */
1787         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
1788         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1789         dk += uikdi * ba[ili];           /* update diag[k] */
1790         ba[ili] = uikdi;                 /* -U(i,k) */
1791 
1792         /* add multiple of row i to k-th row */
1793         jmin = ili + 1;
1794         jmax = bi[i + 1];
1795         if (jmin < jmax) {
1796           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1797           /* update il and c2r for row i */
1798           il[i]  = jmin;
1799           j      = bj[jmin];
1800           c2r[i] = c2r[j];
1801           c2r[j] = i;
1802         }
1803         i = nexti;
1804       }
1805 
1806       /* copy data into U(k,:) */
1807       rs   = 0.0;
1808       jmin = bi[k];
1809       jmax = bi[k + 1] - 1;
1810       if (jmin < jmax) {
1811         for (j = jmin; j < jmax; j++) {
1812           col   = bj[j];
1813           ba[j] = rtmp[col];
1814           rs += PetscAbsScalar(ba[j]);
1815         }
1816         /* add the k-th row into il and c2r */
1817         il[k]  = jmin;
1818         i      = bj[jmin];
1819         c2r[k] = c2r[i];
1820         c2r[i] = k;
1821       }
1822 
1823       /* MatPivotCheck() */
1824       sctx.rs = rs;
1825       sctx.pv = dk;
1826       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
1827       if (sctx.newshift) break;
1828       dk = sctx.pv;
1829 
1830       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1831     }
1832   } while (sctx.newshift);
1833 
1834   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1835   PetscCall(PetscFree3(rtmp, il, c2r));
1836   PetscCall(ISRestoreIndices(ip, &rip));
1837   PetscCall(ISRestoreIndices(iip, &riip));
1838 
1839   PetscCall(ISIdentity(ip, &perm_identity));
1840   if (perm_identity) {
1841     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1842     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1843     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1844     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1845   } else {
1846     B->ops->solve          = MatSolve_SeqSBAIJ_1;
1847     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
1848     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
1849     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
1850   }
1851 
1852   C->assembled    = PETSC_TRUE;
1853   C->preallocated = PETSC_TRUE;
1854 
1855   PetscCall(PetscLogFlops(C->rmap->n));
1856 
1857   /* MatPivotView() */
1858   if (sctx.nshift) {
1859     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1860       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1861     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1862       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1863     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1864       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1865     }
1866   }
1867   PetscFunctionReturn(PETSC_SUCCESS);
1868 }
1869 
MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo * info)1870 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
1871 {
1872   Mat              C  = B;
1873   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
1874   Mat_SeqSBAIJ    *b  = (Mat_SeqSBAIJ *)C->data;
1875   IS               ip = b->row, iip = b->icol;
1876   const PetscInt  *rip, *riip;
1877   PetscInt         i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
1878   PetscInt        *ai = a->i, *aj = a->j;
1879   PetscInt         k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1880   MatScalar       *rtmp, *ba = b->a, *bval, dk, uikdi;
1881   const MatScalar *aa, *v;
1882   PetscBool        perm_identity;
1883   FactorShiftCtx   sctx;
1884   PetscReal        rs;
1885   MatScalar        d;
1886   const PetscInt  *adiag;
1887 
1888   PetscFunctionBegin;
1889   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1890   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1891   /* MatPivotSetUp(): initialize shift context sctx */
1892   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1893 
1894   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1895     sctx.shift_top = info->zeropivot;
1896     for (i = 0; i < mbs; i++) {
1897       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1898       d  = aa[adiag[i]];
1899       rs = -PetscAbsScalar(d) - PetscRealPart(d);
1900       v  = aa + ai[i];
1901       nz = ai[i + 1] - ai[i];
1902       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
1903       if (rs > sctx.shift_top) sctx.shift_top = rs;
1904     }
1905     sctx.shift_top *= 1.1;
1906     sctx.nshift_max = 5;
1907     sctx.shift_lo   = 0.;
1908     sctx.shift_hi   = 1.;
1909   }
1910 
1911   PetscCall(ISGetIndices(ip, &rip));
1912   PetscCall(ISGetIndices(iip, &riip));
1913 
1914   /* initialization */
1915   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
1916 
1917   do {
1918     sctx.newshift = PETSC_FALSE;
1919 
1920     for (i = 0; i < mbs; i++) jl[i] = mbs;
1921     il[0] = 0;
1922 
1923     for (k = 0; k < mbs; k++) {
1924       /* zero rtmp */
1925       nz    = bi[k + 1] - bi[k];
1926       bjtmp = bj + bi[k];
1927       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1928 
1929       bval = ba + bi[k];
1930       /* initialize k-th row by the perm[k]-th row of A */
1931       jmin = ai[rip[k]];
1932       jmax = ai[rip[k] + 1];
1933       for (j = jmin; j < jmax; j++) {
1934         col = riip[aj[j]];
1935         if (col >= k) { /* only take upper triangular entry */
1936           rtmp[col] = aa[j];
1937           *bval++   = 0.0; /* for in-place factorization */
1938         }
1939       }
1940       /* shift the diagonal of the matrix */
1941       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1942 
1943       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1944       dk = rtmp[k];
1945       i  = jl[k]; /* first row to be added to k_th row  */
1946 
1947       while (i < k) {
1948         nexti = jl[i]; /* next row to be added to k_th row */
1949 
1950         /* compute multiplier, update diag(k) and U(i,k) */
1951         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
1952         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1953         dk += uikdi * ba[ili];
1954         ba[ili] = uikdi; /* -U(i,k) */
1955 
1956         /* add multiple of row i to k-th row */
1957         jmin = ili + 1;
1958         jmax = bi[i + 1];
1959         if (jmin < jmax) {
1960           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1961           /* update il and jl for row i */
1962           il[i] = jmin;
1963           j     = bj[jmin];
1964           jl[i] = jl[j];
1965           jl[j] = i;
1966         }
1967         i = nexti;
1968       }
1969 
1970       /* shift the diagonals when zero pivot is detected */
1971       /* compute rs=sum of abs(off-diagonal) */
1972       rs   = 0.0;
1973       jmin = bi[k] + 1;
1974       nz   = bi[k + 1] - jmin;
1975       bcol = bj + jmin;
1976       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
1977 
1978       sctx.rs = rs;
1979       sctx.pv = dk;
1980       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1981       if (sctx.newshift) break;
1982       dk = sctx.pv;
1983 
1984       /* copy data into U(k,:) */
1985       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1986       jmin      = bi[k] + 1;
1987       jmax      = bi[k + 1];
1988       if (jmin < jmax) {
1989         for (j = jmin; j < jmax; j++) {
1990           col   = bj[j];
1991           ba[j] = rtmp[col];
1992         }
1993         /* add the k-th row into il and jl */
1994         il[k] = jmin;
1995         i     = bj[jmin];
1996         jl[k] = jl[i];
1997         jl[i] = k;
1998       }
1999     }
2000   } while (sctx.newshift);
2001 
2002   PetscCall(PetscFree3(rtmp, il, jl));
2003   PetscCall(ISRestoreIndices(ip, &rip));
2004   PetscCall(ISRestoreIndices(iip, &riip));
2005   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2006 
2007   PetscCall(ISIdentity(ip, &perm_identity));
2008   if (perm_identity) {
2009     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2010     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2011     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2012     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2013   } else {
2014     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2015     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2016     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2017     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2018   }
2019 
2020   C->assembled    = PETSC_TRUE;
2021   C->preallocated = PETSC_TRUE;
2022 
2023   PetscCall(PetscLogFlops(C->rmap->n));
2024   if (sctx.nshift) {
2025     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2026       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2027     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2028       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2029     }
2030   }
2031   PetscFunctionReturn(PETSC_SUCCESS);
2032 }
2033 
2034 /*
2035    icc() under revised new data structure.
2036    Factored arrays bj and ba are stored as
2037      U(0,:),...,U(i,:),U(n-1,:)
2038 
2039    ui=fact->i is an array of size n+1, in which
2040    ui+
2041      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2042      ui[n]:  points to U(n-1,n-1)+1
2043 
2044   udiag=fact->diag is an array of size n,in which
2045      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2046 
2047    U(i,:) contains udiag[i] as its last entry, i.e.,
2048     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2049 */
2050 
MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo * info)2051 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2052 {
2053   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2054   Mat_SeqSBAIJ      *b;
2055   PetscBool          perm_identity;
2056   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2057   const PetscInt    *rip, *riip, *adiag;
2058   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2059   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
2060   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2061   PetscReal          fill = info->fill, levels = info->levels;
2062   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2063   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2064   PetscBT            lnkbt;
2065   IS                 iperm;
2066   PetscBool          diagDense;
2067 
2068   PetscFunctionBegin;
2069   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2070   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
2071   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
2072   PetscCall(ISIdentity(perm, &perm_identity));
2073   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2074 
2075   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2076   PetscCall(PetscMalloc1(am + 1, &udiag));
2077   ui[0] = 0;
2078 
2079   /* ICC(0) without matrix ordering: simply rearrange column indices */
2080   if (!levels && perm_identity) {
2081     for (i = 0; i < am; i++) {
2082       ncols     = ai[i + 1] - adiag[i];
2083       ui[i + 1] = ui[i] + ncols;
2084       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2085     }
2086     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2087     cols = uj;
2088     for (i = 0; i < am; i++) {
2089       aj    = a->j + adiag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2090       ncols = ai[i + 1] - adiag[i] - 1;
2091       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2092       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2093     }
2094   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2095     PetscCall(ISGetIndices(iperm, &riip));
2096     PetscCall(ISGetIndices(perm, &rip));
2097 
2098     /* initialization */
2099     PetscCall(PetscMalloc1(am + 1, &ajtmp));
2100 
2101     /* jl: linked list for storing indices of the pivot rows
2102        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2103     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2104     for (i = 0; i < am; i++) {
2105       jl[i] = am;
2106       il[i] = 0;
2107     }
2108 
2109     /* create and initialize a linked list for storing column indices of the active row k */
2110     nlnk = am + 1;
2111     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2112 
2113     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2114     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2115     current_space = free_space;
2116     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2117     current_space_lvl = free_space_lvl;
2118 
2119     for (k = 0; k < am; k++) { /* for each active row k */
2120       /* initialize lnk by the column indices of row rip[k] of A */
2121       nzk   = 0;
2122       ncols = ai[rip[k] + 1] - ai[rip[k]];
2123       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2124       ncols_upper = 0;
2125       for (j = 0; j < ncols; j++) {
2126         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2127         if (riip[i] >= k) {         /* only take upper triangular entry */
2128           ajtmp[ncols_upper] = i;
2129           ncols_upper++;
2130         }
2131       }
2132       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2133       nzk += nlnk;
2134 
2135       /* update lnk by computing fill-in for each pivot row to be merged in */
2136       prow = jl[k]; /* 1st pivot row */
2137 
2138       while (prow < k) {
2139         nextprow = jl[prow];
2140 
2141         /* merge prow into k-th row */
2142         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2143         jmax  = ui[prow + 1];
2144         ncols = jmax - jmin;
2145         i     = jmin - ui[prow];
2146         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2147         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2148         j     = *(uj - 1);
2149         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2150         nzk += nlnk;
2151 
2152         /* update il and jl for prow */
2153         if (jmin < jmax) {
2154           il[prow] = jmin;
2155           j        = *cols;
2156           jl[prow] = jl[j];
2157           jl[j]    = prow;
2158         }
2159         prow = nextprow;
2160       }
2161 
2162       /* if free space is not available, make more free space */
2163       if (current_space->local_remaining < nzk) {
2164         i = am - k + 1;                                    /* num of unfactored rows */
2165         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2166         PetscCall(PetscFreeSpaceGet(i, &current_space));
2167         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2168         reallocs++;
2169       }
2170 
2171       /* copy data into free_space and free_space_lvl, then initialize lnk */
2172       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2173       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2174 
2175       /* add the k-th row into il and jl */
2176       if (nzk > 1) {
2177         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2178         jl[k] = jl[i];
2179         jl[i] = k;
2180         il[k] = ui[k] + 1;
2181       }
2182       uj_ptr[k]     = current_space->array;
2183       uj_lvl_ptr[k] = current_space_lvl->array;
2184 
2185       current_space->array += nzk;
2186       current_space->local_used += nzk;
2187       current_space->local_remaining -= nzk;
2188 
2189       current_space_lvl->array += nzk;
2190       current_space_lvl->local_used += nzk;
2191       current_space_lvl->local_remaining -= nzk;
2192 
2193       ui[k + 1] = ui[k] + nzk;
2194     }
2195 
2196     PetscCall(ISRestoreIndices(perm, &rip));
2197     PetscCall(ISRestoreIndices(iperm, &riip));
2198     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2199     PetscCall(PetscFree(ajtmp));
2200 
2201     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2202     PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2203     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
2204     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2205     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2206 
2207   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2208 
2209   /* put together the new matrix in MATSEQSBAIJ format */
2210   b          = (Mat_SeqSBAIJ *)fact->data;
2211   b->free_ij = PETSC_TRUE;
2212   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
2213   b->free_a = PETSC_TRUE;
2214   b->j      = uj;
2215   b->i      = ui;
2216   b->diag   = udiag;
2217   b->ilen   = NULL;
2218   b->imax   = NULL;
2219   b->row    = perm;
2220   b->col    = perm;
2221   PetscCall(PetscObjectReference((PetscObject)perm));
2222   PetscCall(PetscObjectReference((PetscObject)perm));
2223   b->icol          = iperm;
2224   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2225 
2226   PetscCall(PetscMalloc1(am, &b->solve_work));
2227 
2228   b->maxnz = b->nz = ui[am];
2229 
2230   fact->info.factor_mallocs   = reallocs;
2231   fact->info.fill_ratio_given = fill;
2232   if (ai[am] != 0) {
2233     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2234     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2235   } else {
2236     fact->info.fill_ratio_needed = 0.0;
2237   }
2238 #if defined(PETSC_USE_INFO)
2239   if (ai[am] != 0) {
2240     PetscReal af = fact->info.fill_ratio_needed;
2241     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2242     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2243     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2244   } else {
2245     PetscCall(PetscInfo(A, "Empty matrix\n"));
2246   }
2247 #endif
2248   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2249   PetscFunctionReturn(PETSC_SUCCESS);
2250 }
2251 
MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo * info)2252 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2253 {
2254   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2255   Mat_SeqSBAIJ      *b;
2256   PetscBool          perm_identity;
2257   PetscReal          fill = info->fill;
2258   const PetscInt    *rip, *riip;
2259   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2260   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2261   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2262   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2263   PetscBT            lnkbt;
2264   IS                 iperm;
2265   PetscBool          diagDense;
2266 
2267   PetscFunctionBegin;
2268   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2269   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
2270   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
2271 
2272   /* check whether perm is the identity mapping */
2273   PetscCall(ISIdentity(perm, &perm_identity));
2274   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2275   PetscCall(ISGetIndices(iperm, &riip));
2276   PetscCall(ISGetIndices(perm, &rip));
2277 
2278   /* initialization */
2279   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2280   PetscCall(PetscMalloc1(am + 1, &udiag));
2281   ui[0] = 0;
2282 
2283   /* jl: linked list for storing indices of the pivot rows
2284      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2285   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2286   for (i = 0; i < am; i++) {
2287     jl[i] = am;
2288     il[i] = 0;
2289   }
2290 
2291   /* create and initialize a linked list for storing column indices of the active row k */
2292   nlnk = am + 1;
2293   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2294 
2295   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2296   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2297   current_space = free_space;
2298 
2299   for (k = 0; k < am; k++) { /* for each active row k */
2300     /* initialize lnk by the column indices of row rip[k] of A */
2301     nzk   = 0;
2302     ncols = ai[rip[k] + 1] - ai[rip[k]];
2303     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2304     ncols_upper = 0;
2305     for (j = 0; j < ncols; j++) {
2306       i = riip[*(aj + ai[rip[k]] + j)];
2307       if (i >= k) { /* only take upper triangular entry */
2308         cols[ncols_upper] = i;
2309         ncols_upper++;
2310       }
2311     }
2312     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2313     nzk += nlnk;
2314 
2315     /* update lnk by computing fill-in for each pivot row to be merged in */
2316     prow = jl[k]; /* 1st pivot row */
2317 
2318     while (prow < k) {
2319       nextprow = jl[prow];
2320       /* merge prow into k-th row */
2321       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2322       jmax   = ui[prow + 1];
2323       ncols  = jmax - jmin;
2324       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2325       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2326       nzk += nlnk;
2327 
2328       /* update il and jl for prow */
2329       if (jmin < jmax) {
2330         il[prow] = jmin;
2331         j        = *uj_ptr;
2332         jl[prow] = jl[j];
2333         jl[j]    = prow;
2334       }
2335       prow = nextprow;
2336     }
2337 
2338     /* if free space is not available, make more free space */
2339     if (current_space->local_remaining < nzk) {
2340       i = am - k + 1;                                    /* num of unfactored rows */
2341       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2342       PetscCall(PetscFreeSpaceGet(i, &current_space));
2343       reallocs++;
2344     }
2345 
2346     /* copy data into free space, then initialize lnk */
2347     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2348 
2349     /* add the k-th row into il and jl */
2350     if (nzk > 1) {
2351       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2352       jl[k] = jl[i];
2353       jl[i] = k;
2354       il[k] = ui[k] + 1;
2355     }
2356     ui_ptr[k] = current_space->array;
2357 
2358     current_space->array += nzk;
2359     current_space->local_used += nzk;
2360     current_space->local_remaining -= nzk;
2361 
2362     ui[k + 1] = ui[k] + nzk;
2363   }
2364 
2365   PetscCall(ISRestoreIndices(perm, &rip));
2366   PetscCall(ISRestoreIndices(iperm, &riip));
2367   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2368 
2369   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2370   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscInt), (void **)&uj));
2371   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2372   PetscCall(PetscLLDestroy(lnk, lnkbt));
2373 
2374   /* put together the new matrix in MATSEQSBAIJ format */
2375   b          = (Mat_SeqSBAIJ *)fact->data;
2376   b->free_ij = PETSC_TRUE;
2377   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
2378   b->free_a = PETSC_TRUE;
2379   b->j      = uj;
2380   b->i      = ui;
2381   b->diag   = udiag;
2382   b->ilen   = NULL;
2383   b->imax   = NULL;
2384   b->row    = perm;
2385   b->col    = perm;
2386 
2387   PetscCall(PetscObjectReference((PetscObject)perm));
2388   PetscCall(PetscObjectReference((PetscObject)perm));
2389 
2390   b->icol          = iperm;
2391   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2392 
2393   PetscCall(PetscMalloc1(am, &b->solve_work));
2394 
2395   b->maxnz = b->nz = ui[am];
2396 
2397   fact->info.factor_mallocs   = reallocs;
2398   fact->info.fill_ratio_given = fill;
2399   if (ai[am] != 0) {
2400     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2401     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2402   } else {
2403     fact->info.fill_ratio_needed = 0.0;
2404   }
2405 #if defined(PETSC_USE_INFO)
2406   if (ai[am] != 0) {
2407     PetscReal af = fact->info.fill_ratio_needed;
2408     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2409     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2410     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2411   } else {
2412     PetscCall(PetscInfo(A, "Empty matrix\n"));
2413   }
2414 #endif
2415   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2416   PetscFunctionReturn(PETSC_SUCCESS);
2417 }
2418 
MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)2419 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
2420 {
2421   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
2422   PetscInt           n  = A->rmap->n;
2423   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
2424   PetscScalar       *x, sum;
2425   const PetscScalar *b;
2426   const MatScalar   *aa, *v;
2427   PetscInt           i, nz;
2428 
2429   PetscFunctionBegin;
2430   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
2431 
2432   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2433   PetscCall(VecGetArrayRead(bb, &b));
2434   PetscCall(VecGetArrayWrite(xx, &x));
2435 
2436   /* forward solve the lower triangular */
2437   x[0] = b[0];
2438   v    = aa;
2439   vi   = aj;
2440   for (i = 1; i < n; i++) {
2441     nz  = ai[i + 1] - ai[i];
2442     sum = b[i];
2443     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
2444     v += nz;
2445     vi += nz;
2446     x[i] = sum;
2447   }
2448 
2449   /* backward solve the upper triangular */
2450   for (i = n - 1; i >= 0; i--) {
2451     v   = aa + adiag[i + 1] + 1;
2452     vi  = aj + adiag[i + 1] + 1;
2453     nz  = adiag[i] - adiag[i + 1] - 1;
2454     sum = x[i];
2455     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
2456     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
2457   }
2458 
2459   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
2460   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2461   PetscCall(VecRestoreArrayRead(bb, &b));
2462   PetscCall(VecRestoreArrayWrite(xx, &x));
2463   PetscFunctionReturn(PETSC_SUCCESS);
2464 }
2465 
MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)2466 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
2467 {
2468   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
2469   IS                 iscol = a->col, isrow = a->row;
2470   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
2471   const PetscInt    *rout, *cout, *r, *c;
2472   PetscScalar       *x, *tmp, sum;
2473   const PetscScalar *b;
2474   const MatScalar   *aa, *v;
2475 
2476   PetscFunctionBegin;
2477   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
2478 
2479   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2480   PetscCall(VecGetArrayRead(bb, &b));
2481   PetscCall(VecGetArrayWrite(xx, &x));
2482   tmp = a->solve_work;
2483 
2484   PetscCall(ISGetIndices(isrow, &rout));
2485   r = rout;
2486   PetscCall(ISGetIndices(iscol, &cout));
2487   c = cout;
2488 
2489   /* forward solve the lower triangular */
2490   tmp[0] = b[r[0]];
2491   v      = aa;
2492   vi     = aj;
2493   for (i = 1; i < n; i++) {
2494     nz  = ai[i + 1] - ai[i];
2495     sum = b[r[i]];
2496     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
2497     tmp[i] = sum;
2498     v += nz;
2499     vi += nz;
2500   }
2501 
2502   /* backward solve the upper triangular */
2503   for (i = n - 1; i >= 0; i--) {
2504     v   = aa + adiag[i + 1] + 1;
2505     vi  = aj + adiag[i + 1] + 1;
2506     nz  = adiag[i] - adiag[i + 1] - 1;
2507     sum = tmp[i];
2508     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
2509     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
2510   }
2511 
2512   PetscCall(ISRestoreIndices(isrow, &rout));
2513   PetscCall(ISRestoreIndices(iscol, &cout));
2514   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2515   PetscCall(VecRestoreArrayRead(bb, &b));
2516   PetscCall(VecRestoreArrayWrite(xx, &x));
2517   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
2518   PetscFunctionReturn(PETSC_SUCCESS);
2519 }
2520