1 /*
2 Factorization code for BAIJ format.
3 */
4 #include <../src/mat/impls/baij/seq/baij.h>
5 #include <petsc/private/kernels/blockinvert.h>
6
7 /*
8 This is used to set the numeric factorization for both LU and ILU symbolic factorization
9 */
MatSeqBAIJSetNumericFactorization(Mat fact,PetscBool natural)10 PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural)
11 {
12 PetscFunctionBegin;
13 if (natural) {
14 switch (fact->rmap->bs) {
15 case 1:
16 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
17 break;
18 case 2:
19 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
20 break;
21 case 3:
22 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
23 break;
24 case 4:
25 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
26 break;
27 case 5:
28 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
29 break;
30 case 6:
31 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
32 break;
33 case 7:
34 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
35 break;
36 case 9:
37 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
38 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering;
39 #else
40 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
41 #endif
42 break;
43 case 15:
44 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
45 break;
46 default:
47 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
48 break;
49 }
50 } else {
51 switch (fact->rmap->bs) {
52 case 1:
53 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
54 break;
55 case 2:
56 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
57 break;
58 case 3:
59 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
60 break;
61 case 4:
62 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
63 break;
64 case 5:
65 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
66 break;
67 case 6:
68 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
69 break;
70 case 7:
71 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
72 break;
73 default:
74 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
75 break;
76 }
77 }
78 PetscFunctionReturn(PETSC_SUCCESS);
79 }
80
MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscBool natural)81 PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural)
82 {
83 PetscFunctionBegin;
84 if (natural) {
85 switch (inA->rmap->bs) {
86 case 1:
87 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_1_inplace;
88 break;
89 case 2:
90 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
91 break;
92 case 3:
93 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
94 break;
95 case 4:
96 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
97 break;
98 case 5:
99 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
100 break;
101 case 6:
102 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
103 break;
104 case 7:
105 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
106 break;
107 default:
108 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_N_inplace;
109 break;
110 }
111 } else {
112 switch (inA->rmap->bs) {
113 case 1:
114 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_1_inplace;
115 break;
116 case 2:
117 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_2_inplace;
118 break;
119 case 3:
120 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_3_inplace;
121 break;
122 case 4:
123 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_4_inplace;
124 break;
125 case 5:
126 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_5_inplace;
127 break;
128 case 6:
129 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_6_inplace;
130 break;
131 case 7:
132 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_7_inplace;
133 break;
134 default:
135 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_N_inplace;
136 break;
137 }
138 }
139 PetscFunctionReturn(PETSC_SUCCESS);
140 }
141
142 /*
143 The symbolic factorization code is identical to that for AIJ format,
144 except for very small changes since this is now a SeqBAIJ datastructure.
145 NOT good code reuse.
146 */
147 #include <petscbt.h>
148 #include <../src/mat/utils/freespace.h>
149
MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo * info)150 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
151 {
152 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b;
153 PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
154 PetscBool row_identity, col_identity, both_identity;
155 IS isicol;
156 const PetscInt *r, *ic;
157 PetscInt i, *ai = a->i, *aj = a->j;
158 PetscInt *bi, *bj, *ajtmp;
159 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
160 PetscReal f;
161 PetscInt nlnk, *lnk, k, **bi_ptr;
162 PetscFreeSpaceList free_space = NULL, current_space = NULL;
163 PetscBT lnkbt;
164 PetscBool diagDense;
165
166 PetscFunctionBegin;
167 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
168 PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, NULL, &diagDense));
169 PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry");
170
171 if (bs > 1) { /* check shifttype */
172 PetscCheck(info->shifttype != (PetscReal)MAT_SHIFT_NONZERO && info->shifttype != (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
173 }
174
175 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
176 PetscCall(ISGetIndices(isrow, &r));
177 PetscCall(ISGetIndices(isicol, &ic));
178
179 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
180 PetscCall(PetscMalloc1(n + 1, &bi));
181 PetscCall(PetscMalloc1(n + 1, &bdiag));
182 bi[0] = bdiag[0] = 0;
183
184 /* linked list for storing column indices of the active row */
185 nlnk = n + 1;
186 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
187
188 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
189
190 /* initial FreeSpace size is f*(ai[n]+1) */
191 f = info->fill;
192 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
193
194 current_space = free_space;
195
196 for (i = 0; i < n; i++) {
197 /* copy previous fill into linked list */
198 nzi = 0;
199 nnz = ai[r[i] + 1] - ai[r[i]];
200 ajtmp = aj + ai[r[i]];
201 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
202 nzi += nlnk;
203
204 /* add pivot rows into linked list */
205 row = lnk[n];
206 while (row < i) {
207 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
208 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
209 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
210 nzi += nlnk;
211 row = lnk[row];
212 }
213 bi[i + 1] = bi[i] + nzi;
214 im[i] = nzi;
215
216 /* mark bdiag */
217 nzbd = 0;
218 nnz = nzi;
219 k = lnk[n];
220 while (nnz-- && k < i) {
221 nzbd++;
222 k = lnk[k];
223 }
224 bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
225
226 /* if free space is not available, make more free space */
227 if (current_space->local_remaining < nzi) {
228 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */
229 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
230 reallocs++;
231 }
232
233 /* copy data into free space, then initialize lnk */
234 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
235
236 bi_ptr[i] = current_space->array;
237 current_space->array += nzi;
238 current_space->local_used += nzi;
239 current_space->local_remaining -= nzi;
240 }
241
242 PetscCall(ISRestoreIndices(isrow, &r));
243 PetscCall(ISRestoreIndices(isicol, &ic));
244
245 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
246 PetscCall(PetscMalloc1(bi[n], &bj));
247 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
248 PetscCall(PetscLLDestroy(lnk, lnkbt));
249 PetscCall(PetscFree2(bi_ptr, im));
250
251 /* put together the new matrix */
252 PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
253 b = (Mat_SeqBAIJ *)B->data;
254
255 b->free_ij = PETSC_TRUE;
256 PetscCall(PetscShmgetAllocateArray((bdiag[0] + 1) * bs2, sizeof(PetscScalar), (void **)&b->a));
257 b->free_a = PETSC_TRUE;
258 b->j = bj;
259 b->i = bi;
260 b->diag = bdiag;
261 b->ilen = NULL;
262 b->imax = NULL;
263 b->row = isrow;
264 b->col = iscol;
265 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
266
267 PetscCall(PetscObjectReference((PetscObject)isrow));
268 PetscCall(PetscObjectReference((PetscObject)iscol));
269 b->icol = isicol;
270 PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));
271
272 b->maxnz = b->nz = bdiag[0] + 1;
273
274 B->factortype = MAT_FACTOR_LU;
275 B->info.factor_mallocs = reallocs;
276 B->info.fill_ratio_given = f;
277
278 if (ai[n] != 0) {
279 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
280 } else {
281 B->info.fill_ratio_needed = 0.0;
282 }
283 #if defined(PETSC_USE_INFO)
284 if (ai[n] != 0) {
285 PetscReal af = B->info.fill_ratio_needed;
286 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
287 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
288 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
289 PetscCall(PetscInfo(A, "for best performance.\n"));
290 } else {
291 PetscCall(PetscInfo(A, "Empty matrix\n"));
292 }
293 #endif
294
295 PetscCall(ISIdentity(isrow, &row_identity));
296 PetscCall(ISIdentity(iscol, &col_identity));
297
298 both_identity = (PetscBool)(row_identity && col_identity);
299
300 PetscCall(MatSeqBAIJSetNumericFactorization(B, both_identity));
301 PetscFunctionReturn(PETSC_SUCCESS);
302 }
303