xref: /petsc/src/mat/impls/baij/seq/baijfact3.c (revision f13dfd9ea68e0ddeee984e65c377a1819eab8a8a)
1 /*
2     Factorization code for BAIJ format.
3 */
4 #include <../src/mat/impls/baij/seq/baij.h>
5 #include <petsc/private/kernels/blockinvert.h>
6 
7 /*
8    This is used to set the numeric factorization for both LU and ILU symbolic factorization
9 */
10 PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural)
11 {
12   PetscFunctionBegin;
13   if (natural) {
14     switch (fact->rmap->bs) {
15     case 1:
16       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
17       break;
18     case 2:
19       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
20       break;
21     case 3:
22       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
23       break;
24     case 4:
25       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
26       break;
27     case 5:
28       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
29       break;
30     case 6:
31       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
32       break;
33     case 7:
34       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
35       break;
36     case 9:
37 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
38       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering;
39 #else
40       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
41 #endif
42       break;
43     case 15:
44       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
45       break;
46     default:
47       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
48       break;
49     }
50   } else {
51     switch (fact->rmap->bs) {
52     case 1:
53       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
54       break;
55     case 2:
56       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
57       break;
58     case 3:
59       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
60       break;
61     case 4:
62       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
63       break;
64     case 5:
65       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
66       break;
67     case 6:
68       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
69       break;
70     case 7:
71       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
72       break;
73     default:
74       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
75       break;
76     }
77   }
78   PetscFunctionReturn(PETSC_SUCCESS);
79 }
80 
81 PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural)
82 {
83   PetscFunctionBegin;
84   if (natural) {
85     switch (inA->rmap->bs) {
86     case 1:
87       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
88       break;
89     case 2:
90       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
91       break;
92     case 3:
93       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
94       break;
95     case 4:
96 #if defined(PETSC_USE_REAL_MAT_SINGLE)
97     {
98       PetscBool sse_enabled_local;
99       PetscCall(PetscSSEIsEnabled(inA->comm, &sse_enabled_local, NULL));
100       if (sse_enabled_local) {
101   #if defined(PETSC_HAVE_SSE)
102         int i, *AJ = a->j, nz = a->nz, n = a->mbs;
103         if (n == (unsigned short)n) {
104           unsigned short *aj = (unsigned short *)AJ;
105           for (i = 0; i < nz; i++) aj[i] = (unsigned short)AJ[i];
106 
107           inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
108           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
109 
110           PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, ushort j index factor BS=4\n"));
111         } else {
112           /* Scale the column indices for easier indexing in MatSolve. */
113           /*            for (i=0;i<nz;i++) { */
114           /*              AJ[i] = AJ[i]*4; */
115           /*            } */
116           inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
117           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
118 
119           PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, int j index factor BS=4\n"));
120         }
121   #else
122         /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
123         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "SSE Hardware unavailable");
124   #endif
125       } else {
126         inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
127       }
128     }
129 #else
130       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
131 #endif
132     break;
133     case 5:
134       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
135       break;
136     case 6:
137       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
138       break;
139     case 7:
140       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
141       break;
142     default:
143       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
144       break;
145     }
146   } else {
147     switch (inA->rmap->bs) {
148     case 1:
149       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
150       break;
151     case 2:
152       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
153       break;
154     case 3:
155       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
156       break;
157     case 4:
158       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
159       break;
160     case 5:
161       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
162       break;
163     case 6:
164       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
165       break;
166     case 7:
167       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
168       break;
169     default:
170       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
171       break;
172     }
173   }
174   PetscFunctionReturn(PETSC_SUCCESS);
175 }
176 
177 /*
178     The symbolic factorization code is identical to that for AIJ format,
179   except for very small changes since this is now a SeqBAIJ datastructure.
180   NOT good code reuse.
181 */
182 #include <petscbt.h>
183 #include <../src/mat/utils/freespace.h>
184 
185 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
186 {
187   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data, *b;
188   PetscInt           n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
189   PetscBool          row_identity, col_identity, both_identity;
190   IS                 isicol;
191   const PetscInt    *r, *ic;
192   PetscInt           i, *ai = a->i, *aj = a->j;
193   PetscInt          *bi, *bj, *ajtmp;
194   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
195   PetscReal          f;
196   PetscInt           nlnk, *lnk, k, **bi_ptr;
197   PetscFreeSpaceList free_space = NULL, current_space = NULL;
198   PetscBT            lnkbt;
199   PetscBool          missing;
200 
201   PetscFunctionBegin;
202   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
203   PetscCall(MatMissingDiagonal(A, &missing, &i));
204   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
205 
206   if (bs > 1) { /* check shifttype */
207     PetscCheck(info->shifttype != (PetscReal)MAT_SHIFT_NONZERO && info->shifttype != (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
208   }
209 
210   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
211   PetscCall(ISGetIndices(isrow, &r));
212   PetscCall(ISGetIndices(isicol, &ic));
213 
214   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
215   PetscCall(PetscMalloc1(n + 1, &bi));
216   PetscCall(PetscMalloc1(n + 1, &bdiag));
217   bi[0] = bdiag[0] = 0;
218 
219   /* linked list for storing column indices of the active row */
220   nlnk = n + 1;
221   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
222 
223   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
224 
225   /* initial FreeSpace size is f*(ai[n]+1) */
226   f = info->fill;
227   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
228 
229   current_space = free_space;
230 
231   for (i = 0; i < n; i++) {
232     /* copy previous fill into linked list */
233     nzi   = 0;
234     nnz   = ai[r[i] + 1] - ai[r[i]];
235     ajtmp = aj + ai[r[i]];
236     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
237     nzi += nlnk;
238 
239     /* add pivot rows into linked list */
240     row = lnk[n];
241     while (row < i) {
242       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
243       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
244       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
245       nzi += nlnk;
246       row = lnk[row];
247     }
248     bi[i + 1] = bi[i] + nzi;
249     im[i]     = nzi;
250 
251     /* mark bdiag */
252     nzbd = 0;
253     nnz  = nzi;
254     k    = lnk[n];
255     while (nnz-- && k < i) {
256       nzbd++;
257       k = lnk[k];
258     }
259     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
260 
261     /* if free space is not available, make more free space */
262     if (current_space->local_remaining < nzi) {
263       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */
264       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
265       reallocs++;
266     }
267 
268     /* copy data into free space, then initialize lnk */
269     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
270 
271     bi_ptr[i] = current_space->array;
272     current_space->array += nzi;
273     current_space->local_used += nzi;
274     current_space->local_remaining -= nzi;
275   }
276 
277   PetscCall(ISRestoreIndices(isrow, &r));
278   PetscCall(ISRestoreIndices(isicol, &ic));
279 
280   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
281   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
282   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
283   PetscCall(PetscLLDestroy(lnk, lnkbt));
284   PetscCall(PetscFree2(bi_ptr, im));
285 
286   /* put together the new matrix */
287   PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
288   b = (Mat_SeqBAIJ *)(B)->data;
289 
290   b->free_a       = PETSC_TRUE;
291   b->free_ij      = PETSC_TRUE;
292   b->singlemalloc = PETSC_FALSE;
293 
294   PetscCall(PetscMalloc1((bdiag[0] + 1) * bs2, &b->a));
295   b->j             = bj;
296   b->i             = bi;
297   b->diag          = bdiag;
298   b->free_diag     = PETSC_TRUE;
299   b->ilen          = NULL;
300   b->imax          = NULL;
301   b->row           = isrow;
302   b->col           = iscol;
303   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
304 
305   PetscCall(PetscObjectReference((PetscObject)isrow));
306   PetscCall(PetscObjectReference((PetscObject)iscol));
307   b->icol = isicol;
308   PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));
309 
310   b->maxnz = b->nz = bdiag[0] + 1;
311 
312   B->factortype            = MAT_FACTOR_LU;
313   B->info.factor_mallocs   = reallocs;
314   B->info.fill_ratio_given = f;
315 
316   if (ai[n] != 0) {
317     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
318   } else {
319     B->info.fill_ratio_needed = 0.0;
320   }
321 #if defined(PETSC_USE_INFO)
322   if (ai[n] != 0) {
323     PetscReal af = B->info.fill_ratio_needed;
324     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
325     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
326     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
327     PetscCall(PetscInfo(A, "for best performance.\n"));
328   } else {
329     PetscCall(PetscInfo(A, "Empty matrix\n"));
330   }
331 #endif
332 
333   PetscCall(ISIdentity(isrow, &row_identity));
334   PetscCall(ISIdentity(iscol, &col_identity));
335 
336   both_identity = (PetscBool)(row_identity && col_identity);
337 
338   PetscCall(MatSeqBAIJSetNumericFactorization(B, both_identity));
339   PetscFunctionReturn(PETSC_SUCCESS);
340 }
341 
342 #if 0
343 // unused
344 static PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
345 {
346   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data, *b;
347   PetscInt           n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
348   PetscBool          row_identity, col_identity, both_identity;
349   IS                 isicol;
350   const PetscInt    *r, *ic;
351   PetscInt           i, *ai = a->i, *aj = a->j;
352   PetscInt          *bi, *bj, *ajtmp;
353   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
354   PetscReal          f;
355   PetscInt           nlnk, *lnk, k, **bi_ptr;
356   PetscFreeSpaceList free_space = NULL, current_space = NULL;
357   PetscBT            lnkbt;
358   PetscBool          missing;
359 
360   PetscFunctionBegin;
361   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
362   PetscCall(MatMissingDiagonal(A, &missing, &i));
363   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
364 
365   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
366   PetscCall(ISGetIndices(isrow, &r));
367   PetscCall(ISGetIndices(isicol, &ic));
368 
369   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
370   PetscCall(PetscMalloc1(n + 1, &bi));
371   PetscCall(PetscMalloc1(n + 1, &bdiag));
372 
373   bi[0] = bdiag[0] = 0;
374 
375   /* linked list for storing column indices of the active row */
376   nlnk = n + 1;
377   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
378 
379   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
380 
381   /* initial FreeSpace size is f*(ai[n]+1) */
382   f = info->fill;
383   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
384   current_space = free_space;
385 
386   for (i = 0; i < n; i++) {
387     /* copy previous fill into linked list */
388     nzi   = 0;
389     nnz   = ai[r[i] + 1] - ai[r[i]];
390     ajtmp = aj + ai[r[i]];
391     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
392     nzi += nlnk;
393 
394     /* add pivot rows into linked list */
395     row = lnk[n];
396     while (row < i) {
397       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
398       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
399       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
400       nzi += nlnk;
401       row = lnk[row];
402     }
403     bi[i + 1] = bi[i] + nzi;
404     im[i]     = nzi;
405 
406     /* mark bdiag */
407     nzbd = 0;
408     nnz  = nzi;
409     k    = lnk[n];
410     while (nnz-- && k < i) {
411       nzbd++;
412       k = lnk[k];
413     }
414     bdiag[i] = bi[i] + nzbd;
415 
416     /* if free space is not available, make more free space */
417     if (current_space->local_remaining < nzi) {
418       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
419       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
420       reallocs++;
421     }
422 
423     /* copy data into free space, then initialize lnk */
424     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
425 
426     bi_ptr[i] = current_space->array;
427     current_space->array += nzi;
428     current_space->local_used += nzi;
429     current_space->local_remaining -= nzi;
430   }
431   #if defined(PETSC_USE_INFO)
432   if (ai[n] != 0) {
433     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
434     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
435     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
436     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
437     PetscCall(PetscInfo(A, "for best performance.\n"));
438   } else {
439     PetscCall(PetscInfo(A, "Empty matrix\n"));
440   }
441   #endif
442 
443   PetscCall(ISRestoreIndices(isrow, &r));
444   PetscCall(ISRestoreIndices(isicol, &ic));
445 
446   /* destroy list of free space and other temporary array(s) */
447   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
448   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
449   PetscCall(PetscLLDestroy(lnk, lnkbt));
450   PetscCall(PetscFree2(bi_ptr, im));
451 
452   /* put together the new matrix */
453   PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
454   b = (Mat_SeqBAIJ *)(B)->data;
455 
456   b->free_a       = PETSC_TRUE;
457   b->free_ij      = PETSC_TRUE;
458   b->singlemalloc = PETSC_FALSE;
459 
460   PetscCall(PetscMalloc1((bi[n] + 1) * bs2, &b->a));
461   b->j             = bj;
462   b->i             = bi;
463   b->diag          = bdiag;
464   b->free_diag     = PETSC_TRUE;
465   b->ilen          = NULL;
466   b->imax          = NULL;
467   b->row           = isrow;
468   b->col           = iscol;
469   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
470 
471   PetscCall(PetscObjectReference((PetscObject)isrow));
472   PetscCall(PetscObjectReference((PetscObject)iscol));
473   b->icol = isicol;
474 
475   PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));
476 
477   b->maxnz = b->nz = bi[n];
478 
479   (B)->factortype            = MAT_FACTOR_LU;
480   (B)->info.factor_mallocs   = reallocs;
481   (B)->info.fill_ratio_given = f;
482 
483   if (ai[n] != 0) {
484     (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
485   } else {
486     (B)->info.fill_ratio_needed = 0.0;
487   }
488 
489   PetscCall(ISIdentity(isrow, &row_identity));
490   PetscCall(ISIdentity(iscol, &col_identity));
491 
492   both_identity = (PetscBool)(row_identity && col_identity);
493 
494   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(B, both_identity));
495   PetscFunctionReturn(PETSC_SUCCESS);
496 }
497 #endif
498