xref: /petsc/src/mat/impls/baij/seq/baijfact3.c (revision 174dc0c8cee294b82b85e4dd3b331b29396264fc)
1 /*
2     Factorization code for BAIJ format.
3 */
4 #include <../src/mat/impls/baij/seq/baij.h>
5 #include <petsc/private/kernels/blockinvert.h>
6 
7 /*
8    This is used to set the numeric factorization for both LU and ILU symbolic factorization
9 */
10 PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural)
11 {
12   PetscFunctionBegin;
13   if (natural) {
14     switch (fact->rmap->bs) {
15     case 1:
16       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
17       break;
18     case 2:
19       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
20       break;
21     case 3:
22       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
23       break;
24     case 4:
25       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
26       break;
27     case 5:
28       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
29       break;
30     case 6:
31       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
32       break;
33     case 7:
34       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
35       break;
36     case 9:
37 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
38       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering;
39 #else
40       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
41 #endif
42       break;
43     case 15:
44       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
45       break;
46     default:
47       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
48       break;
49     }
50   } else {
51     switch (fact->rmap->bs) {
52     case 1:
53       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
54       break;
55     case 2:
56       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
57       break;
58     case 3:
59       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
60       break;
61     case 4:
62       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
63       break;
64     case 5:
65       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
66       break;
67     case 6:
68       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
69       break;
70     case 7:
71       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
72       break;
73     default:
74       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
75       break;
76     }
77   }
78   PetscFunctionReturn(PETSC_SUCCESS);
79 }
80 
81 PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural)
82 {
83   PetscFunctionBegin;
84   if (natural) {
85     switch (inA->rmap->bs) {
86     case 1:
87       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
88       break;
89     case 2:
90       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
91       break;
92     case 3:
93       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
94       break;
95     case 4:
96       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
97       break;
98     case 5:
99       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
100       break;
101     case 6:
102       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
103       break;
104     case 7:
105       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
106       break;
107     default:
108       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
109       break;
110     }
111   } else {
112     switch (inA->rmap->bs) {
113     case 1:
114       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
115       break;
116     case 2:
117       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
118       break;
119     case 3:
120       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
121       break;
122     case 4:
123       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
124       break;
125     case 5:
126       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
127       break;
128     case 6:
129       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
130       break;
131     case 7:
132       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
133       break;
134     default:
135       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
136       break;
137     }
138   }
139   PetscFunctionReturn(PETSC_SUCCESS);
140 }
141 
142 /*
143     The symbolic factorization code is identical to that for AIJ format,
144   except for very small changes since this is now a SeqBAIJ datastructure.
145   NOT good code reuse.
146 */
147 #include <petscbt.h>
148 #include <../src/mat/utils/freespace.h>
149 
150 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
151 {
152   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data, *b;
153   PetscInt           n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
154   PetscBool          row_identity, col_identity, both_identity;
155   IS                 isicol;
156   const PetscInt    *r, *ic;
157   PetscInt           i, *ai = a->i, *aj = a->j;
158   PetscInt          *bi, *bj, *ajtmp;
159   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
160   PetscReal          f;
161   PetscInt           nlnk, *lnk, k, **bi_ptr;
162   PetscFreeSpaceList free_space = NULL, current_space = NULL;
163   PetscBT            lnkbt;
164   PetscBool          missing;
165 
166   PetscFunctionBegin;
167   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
168   PetscCall(MatMissingDiagonal(A, &missing, &i));
169   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
170 
171   if (bs > 1) { /* check shifttype */
172     PetscCheck(info->shifttype != (PetscReal)MAT_SHIFT_NONZERO && info->shifttype != (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
173   }
174 
175   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
176   PetscCall(ISGetIndices(isrow, &r));
177   PetscCall(ISGetIndices(isicol, &ic));
178 
179   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
180   PetscCall(PetscMalloc1(n + 1, &bi));
181   PetscCall(PetscMalloc1(n + 1, &bdiag));
182   bi[0] = bdiag[0] = 0;
183 
184   /* linked list for storing column indices of the active row */
185   nlnk = n + 1;
186   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
187 
188   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
189 
190   /* initial FreeSpace size is f*(ai[n]+1) */
191   f = info->fill;
192   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
193 
194   current_space = free_space;
195 
196   for (i = 0; i < n; i++) {
197     /* copy previous fill into linked list */
198     nzi   = 0;
199     nnz   = ai[r[i] + 1] - ai[r[i]];
200     ajtmp = aj + ai[r[i]];
201     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
202     nzi += nlnk;
203 
204     /* add pivot rows into linked list */
205     row = lnk[n];
206     while (row < i) {
207       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
208       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
209       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
210       nzi += nlnk;
211       row = lnk[row];
212     }
213     bi[i + 1] = bi[i] + nzi;
214     im[i]     = nzi;
215 
216     /* mark bdiag */
217     nzbd = 0;
218     nnz  = nzi;
219     k    = lnk[n];
220     while (nnz-- && k < i) {
221       nzbd++;
222       k = lnk[k];
223     }
224     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
225 
226     /* if free space is not available, make more free space */
227     if (current_space->local_remaining < nzi) {
228       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */
229       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
230       reallocs++;
231     }
232 
233     /* copy data into free space, then initialize lnk */
234     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
235 
236     bi_ptr[i] = current_space->array;
237     current_space->array += nzi;
238     current_space->local_used += nzi;
239     current_space->local_remaining -= nzi;
240   }
241 
242   PetscCall(ISRestoreIndices(isrow, &r));
243   PetscCall(ISRestoreIndices(isicol, &ic));
244 
245   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
246   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
247   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
248   PetscCall(PetscLLDestroy(lnk, lnkbt));
249   PetscCall(PetscFree2(bi_ptr, im));
250 
251   /* put together the new matrix */
252   PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
253   b = (Mat_SeqBAIJ *)B->data;
254 
255   b->free_ij = PETSC_TRUE;
256   PetscCall(PetscShmgetAllocateArray((bdiag[0] + 1) * bs2, sizeof(PetscScalar), (void **)&b->a));
257   b->free_a        = PETSC_TRUE;
258   b->j             = bj;
259   b->i             = bi;
260   b->diag          = bdiag;
261   b->free_diag     = PETSC_TRUE;
262   b->ilen          = NULL;
263   b->imax          = NULL;
264   b->row           = isrow;
265   b->col           = iscol;
266   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
267 
268   PetscCall(PetscObjectReference((PetscObject)isrow));
269   PetscCall(PetscObjectReference((PetscObject)iscol));
270   b->icol = isicol;
271   PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));
272 
273   b->maxnz = b->nz = bdiag[0] + 1;
274 
275   B->factortype            = MAT_FACTOR_LU;
276   B->info.factor_mallocs   = reallocs;
277   B->info.fill_ratio_given = f;
278 
279   if (ai[n] != 0) {
280     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
281   } else {
282     B->info.fill_ratio_needed = 0.0;
283   }
284 #if defined(PETSC_USE_INFO)
285   if (ai[n] != 0) {
286     PetscReal af = B->info.fill_ratio_needed;
287     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
288     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
289     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
290     PetscCall(PetscInfo(A, "for best performance.\n"));
291   } else {
292     PetscCall(PetscInfo(A, "Empty matrix\n"));
293   }
294 #endif
295 
296   PetscCall(ISIdentity(isrow, &row_identity));
297   PetscCall(ISIdentity(iscol, &col_identity));
298 
299   both_identity = (PetscBool)(row_identity && col_identity);
300 
301   PetscCall(MatSeqBAIJSetNumericFactorization(B, both_identity));
302   PetscFunctionReturn(PETSC_SUCCESS);
303 }
304 
305 #if 0
306 // unused
307 static PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
308 {
309   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data, *b;
310   PetscInt           n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
311   PetscBool          row_identity, col_identity, both_identity;
312   IS                 isicol;
313   const PetscInt    *r, *ic;
314   PetscInt           i, *ai = a->i, *aj = a->j;
315   PetscInt          *bi, *bj, *ajtmp;
316   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
317   PetscReal          f;
318   PetscInt           nlnk, *lnk, k, **bi_ptr;
319   PetscFreeSpaceList free_space = NULL, current_space = NULL;
320   PetscBT            lnkbt;
321   PetscBool          missing;
322 
323   PetscFunctionBegin;
324   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
325   PetscCall(MatMissingDiagonal(A, &missing, &i));
326   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
327 
328   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
329   PetscCall(ISGetIndices(isrow, &r));
330   PetscCall(ISGetIndices(isicol, &ic));
331 
332   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
333   PetscCall(PetscMalloc1(n + 1, &bi));
334   PetscCall(PetscMalloc1(n + 1, &bdiag));
335 
336   bi[0] = bdiag[0] = 0;
337 
338   /* linked list for storing column indices of the active row */
339   nlnk = n + 1;
340   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
341 
342   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
343 
344   /* initial FreeSpace size is f*(ai[n]+1) */
345   f = info->fill;
346   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
347   current_space = free_space;
348 
349   for (i = 0; i < n; i++) {
350     /* copy previous fill into linked list */
351     nzi   = 0;
352     nnz   = ai[r[i] + 1] - ai[r[i]];
353     ajtmp = aj + ai[r[i]];
354     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
355     nzi += nlnk;
356 
357     /* add pivot rows into linked list */
358     row = lnk[n];
359     while (row < i) {
360       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
361       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
362       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
363       nzi += nlnk;
364       row = lnk[row];
365     }
366     bi[i + 1] = bi[i] + nzi;
367     im[i]     = nzi;
368 
369     /* mark bdiag */
370     nzbd = 0;
371     nnz  = nzi;
372     k    = lnk[n];
373     while (nnz-- && k < i) {
374       nzbd++;
375       k = lnk[k];
376     }
377     bdiag[i] = bi[i] + nzbd;
378 
379     /* if free space is not available, make more free space */
380     if (current_space->local_remaining < nzi) {
381       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
382       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
383       reallocs++;
384     }
385 
386     /* copy data into free space, then initialize lnk */
387     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
388 
389     bi_ptr[i] = current_space->array;
390     current_space->array += nzi;
391     current_space->local_used += nzi;
392     current_space->local_remaining -= nzi;
393   }
394   #if defined(PETSC_USE_INFO)
395   if (ai[n] != 0) {
396     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
397     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
398     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
399     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
400     PetscCall(PetscInfo(A, "for best performance.\n"));
401   } else {
402     PetscCall(PetscInfo(A, "Empty matrix\n"));
403   }
404   #endif
405 
406   PetscCall(ISRestoreIndices(isrow, &r));
407   PetscCall(ISRestoreIndices(isicol, &ic));
408 
409   /* destroy list of free space and other temporary array(s) */
410   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
411   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
412   PetscCall(PetscLLDestroy(lnk, lnkbt));
413   PetscCall(PetscFree2(bi_ptr, im));
414 
415   /* put together the new matrix */
416   PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
417   b = (Mat_SeqBAIJ *)B->data;
418   b->free_ij      = PETSC_TRUE;
419   PetscCall(PetscShmgetAllocateArray((bi[n] + 1) * bs2,,sizeof(PetscScalar),(void **)&b->a));
420   b->free_a       = PETSC_TRUE;
421   b->j             = bj;
422   b->i             = bi;
423   b->diag          = bdiag;
424   b->free_diag     = PETSC_TRUE;
425   b->ilen          = NULL;
426   b->imax          = NULL;
427   b->row           = isrow;
428   b->col           = iscol;
429   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
430 
431   PetscCall(PetscObjectReference((PetscObject)isrow));
432   PetscCall(PetscObjectReference((PetscObject)iscol));
433   b->icol = isicol;
434 
435   PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));
436 
437   b->maxnz = b->nz = bi[n];
438 
439   B->factortype            = MAT_FACTOR_LU;
440   B->info.factor_mallocs   = reallocs;
441   B->info.fill_ratio_given = f;
442 
443   if (ai[n] != 0) {
444     B->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
445   } else {
446     B->info.fill_ratio_needed = 0.0;
447   }
448 
449   PetscCall(ISIdentity(isrow, &row_identity));
450   PetscCall(ISIdentity(iscol, &col_identity));
451 
452   both_identity = (PetscBool)(row_identity && col_identity);
453 
454   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(B, both_identity));
455   PetscFunctionReturn(PETSC_SUCCESS);
456 }
457 #endif
458