xref: /petsc/src/mat/impls/baij/seq/baijfact3.c (revision e8c0849ab8fe171bed529bea27238c9b402db591)
1 /*
2     Factorization code for BAIJ format.
3 */
4 #include <../src/mat/impls/baij/seq/baij.h>
5 #include <petsc/private/kernels/blockinvert.h>
6 
7 /*
8    This is used to set the numeric factorization for both LU and ILU symbolic factorization
9 */
MatSeqBAIJSetNumericFactorization(Mat fact,PetscBool natural)10 PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural)
11 {
12   PetscFunctionBegin;
13   if (natural) {
14     switch (fact->rmap->bs) {
15     case 1:
16       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
17       break;
18     case 2:
19       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
20       break;
21     case 3:
22       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
23       break;
24     case 4:
25       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
26       break;
27     case 5:
28       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
29       break;
30     case 6:
31       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
32       break;
33     case 7:
34       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
35       break;
36     case 9:
37 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
38       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering;
39 #else
40       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
41 #endif
42       break;
43     case 15:
44       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
45       break;
46     default:
47       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
48       break;
49     }
50   } else {
51     switch (fact->rmap->bs) {
52     case 1:
53       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
54       break;
55     case 2:
56       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
57       break;
58     case 3:
59       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
60       break;
61     case 4:
62       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
63       break;
64     case 5:
65       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
66       break;
67     case 6:
68       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
69       break;
70     case 7:
71       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
72       break;
73     default:
74       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
75       break;
76     }
77   }
78   PetscFunctionReturn(PETSC_SUCCESS);
79 }
80 
MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscBool natural)81 PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural)
82 {
83   PetscFunctionBegin;
84   if (natural) {
85     switch (inA->rmap->bs) {
86     case 1:
87       inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_1_inplace;
88       break;
89     case 2:
90       inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
91       break;
92     case 3:
93       inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
94       break;
95     case 4:
96       inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
97       break;
98     case 5:
99       inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
100       break;
101     case 6:
102       inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
103       break;
104     case 7:
105       inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
106       break;
107     default:
108       inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_N_inplace;
109       break;
110     }
111   } else {
112     switch (inA->rmap->bs) {
113     case 1:
114       inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_1_inplace;
115       break;
116     case 2:
117       inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_2_inplace;
118       break;
119     case 3:
120       inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_3_inplace;
121       break;
122     case 4:
123       inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_4_inplace;
124       break;
125     case 5:
126       inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_5_inplace;
127       break;
128     case 6:
129       inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_6_inplace;
130       break;
131     case 7:
132       inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_7_inplace;
133       break;
134     default:
135       inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_N_inplace;
136       break;
137     }
138   }
139   PetscFunctionReturn(PETSC_SUCCESS);
140 }
141 
142 /*
143     The symbolic factorization code is identical to that for AIJ format,
144   except for very small changes since this is now a SeqBAIJ datastructure.
145   NOT good code reuse.
146 */
147 #include <petscbt.h>
148 #include <../src/mat/utils/freespace.h>
149 
MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo * info)150 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
151 {
152   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data, *b;
153   PetscInt           n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
154   PetscBool          row_identity, col_identity, both_identity;
155   IS                 isicol;
156   const PetscInt    *r, *ic;
157   PetscInt           i, *ai = a->i, *aj = a->j;
158   PetscInt          *bi, *bj, *ajtmp;
159   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
160   PetscReal          f;
161   PetscInt           nlnk, *lnk, k, **bi_ptr;
162   PetscFreeSpaceList free_space = NULL, current_space = NULL;
163   PetscBT            lnkbt;
164   PetscBool          diagDense;
165 
166   PetscFunctionBegin;
167   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
168   PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, NULL, &diagDense));
169   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry");
170 
171   if (bs > 1) { /* check shifttype */
172     PetscCheck(info->shifttype != (PetscReal)MAT_SHIFT_NONZERO && info->shifttype != (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
173   }
174 
175   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
176   PetscCall(ISGetIndices(isrow, &r));
177   PetscCall(ISGetIndices(isicol, &ic));
178 
179   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
180   PetscCall(PetscMalloc1(n + 1, &bi));
181   PetscCall(PetscMalloc1(n + 1, &bdiag));
182   bi[0] = bdiag[0] = 0;
183 
184   /* linked list for storing column indices of the active row */
185   nlnk = n + 1;
186   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
187 
188   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
189 
190   /* initial FreeSpace size is f*(ai[n]+1) */
191   f = info->fill;
192   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
193 
194   current_space = free_space;
195 
196   for (i = 0; i < n; i++) {
197     /* copy previous fill into linked list */
198     nzi   = 0;
199     nnz   = ai[r[i] + 1] - ai[r[i]];
200     ajtmp = aj + ai[r[i]];
201     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
202     nzi += nlnk;
203 
204     /* add pivot rows into linked list */
205     row = lnk[n];
206     while (row < i) {
207       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
208       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
209       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
210       nzi += nlnk;
211       row = lnk[row];
212     }
213     bi[i + 1] = bi[i] + nzi;
214     im[i]     = nzi;
215 
216     /* mark bdiag */
217     nzbd = 0;
218     nnz  = nzi;
219     k    = lnk[n];
220     while (nnz-- && k < i) {
221       nzbd++;
222       k = lnk[k];
223     }
224     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
225 
226     /* if free space is not available, make more free space */
227     if (current_space->local_remaining < nzi) {
228       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */
229       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
230       reallocs++;
231     }
232 
233     /* copy data into free space, then initialize lnk */
234     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
235 
236     bi_ptr[i] = current_space->array;
237     current_space->array += nzi;
238     current_space->local_used += nzi;
239     current_space->local_remaining -= nzi;
240   }
241 
242   PetscCall(ISRestoreIndices(isrow, &r));
243   PetscCall(ISRestoreIndices(isicol, &ic));
244 
245   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
246   PetscCall(PetscMalloc1(bi[n], &bj));
247   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
248   PetscCall(PetscLLDestroy(lnk, lnkbt));
249   PetscCall(PetscFree2(bi_ptr, im));
250 
251   /* put together the new matrix */
252   PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
253   b = (Mat_SeqBAIJ *)B->data;
254 
255   b->free_ij = PETSC_TRUE;
256   PetscCall(PetscShmgetAllocateArray((bdiag[0] + 1) * bs2, sizeof(PetscScalar), (void **)&b->a));
257   b->free_a        = PETSC_TRUE;
258   b->j             = bj;
259   b->i             = bi;
260   b->diag          = bdiag;
261   b->ilen          = NULL;
262   b->imax          = NULL;
263   b->row           = isrow;
264   b->col           = iscol;
265   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
266 
267   PetscCall(PetscObjectReference((PetscObject)isrow));
268   PetscCall(PetscObjectReference((PetscObject)iscol));
269   b->icol = isicol;
270   PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));
271 
272   b->maxnz = b->nz = bdiag[0] + 1;
273 
274   B->factortype            = MAT_FACTOR_LU;
275   B->info.factor_mallocs   = reallocs;
276   B->info.fill_ratio_given = f;
277 
278   if (ai[n] != 0) {
279     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
280   } else {
281     B->info.fill_ratio_needed = 0.0;
282   }
283 #if defined(PETSC_USE_INFO)
284   if (ai[n] != 0) {
285     PetscReal af = B->info.fill_ratio_needed;
286     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
287     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
288     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
289     PetscCall(PetscInfo(A, "for best performance.\n"));
290   } else {
291     PetscCall(PetscInfo(A, "Empty matrix\n"));
292   }
293 #endif
294 
295   PetscCall(ISIdentity(isrow, &row_identity));
296   PetscCall(ISIdentity(iscol, &col_identity));
297 
298   both_identity = (PetscBool)(row_identity && col_identity);
299 
300   PetscCall(MatSeqBAIJSetNumericFactorization(B, both_identity));
301   PetscFunctionReturn(PETSC_SUCCESS);
302 }
303