xref: /petsc/src/mat/impls/baij/seq/baijfact3.c (revision 8d47944aebcd8a3a8a4a1aebd4b3f440b4d70fab)
1 
2 /*
3     Factorization code for BAIJ format.
4 */
5 #include <../src/mat/impls/baij/seq/baij.h>
6 #include <petsc/private/kernels/blockinvert.h>
7 
8 /*
9    This is used to set the numeric factorization for both LU and ILU symbolic factorization
10 */
11 PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural)
12 {
13   PetscFunctionBegin;
14   if (natural) {
15     switch (fact->rmap->bs) {
16     case 1:
17       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
18       break;
19     case 2:
20       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
21       break;
22     case 3:
23       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
24       break;
25     case 4:
26       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
27       break;
28     case 5:
29       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
30       break;
31     case 6:
32       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
33       break;
34     case 7:
35       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
36       break;
37     case 9:
38 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
39       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering;
40 #else
41       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
42 #endif
43       break;
44     case 15:
45       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
46       break;
47     default:
48       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
49       break;
50     }
51   } else {
52     switch (fact->rmap->bs) {
53     case 1:
54       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
55       break;
56     case 2:
57       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
58       break;
59     case 3:
60       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
61       break;
62     case 4:
63       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
64       break;
65     case 5:
66       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
67       break;
68     case 6:
69       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
70       break;
71     case 7:
72       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
73       break;
74     default:
75       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
76       break;
77     }
78   }
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural)
83 {
84   PetscFunctionBegin;
85   if (natural) {
86     switch (inA->rmap->bs) {
87     case 1:
88       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
89       break;
90     case 2:
91       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
92       break;
93     case 3:
94       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
95       break;
96     case 4:
97 #if defined(PETSC_USE_REAL_MAT_SINGLE)
98     {
99       PetscBool sse_enabled_local;
100       PetscCall(PetscSSEIsEnabled(inA->comm, &sse_enabled_local, NULL));
101       if (sse_enabled_local) {
102   #if defined(PETSC_HAVE_SSE)
103         int i, *AJ = a->j, nz = a->nz, n = a->mbs;
104         if (n == (unsigned short)n) {
105           unsigned short *aj = (unsigned short *)AJ;
106           for (i = 0; i < nz; i++) aj[i] = (unsigned short)AJ[i];
107 
108           inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
109           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
110 
111           PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, ushort j index factor BS=4\n"));
112         } else {
113           /* Scale the column indices for easier indexing in MatSolve. */
114           /*            for (i=0;i<nz;i++) { */
115           /*              AJ[i] = AJ[i]*4; */
116           /*            } */
117           inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
118           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
119 
120           PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, int j index factor BS=4\n"));
121         }
122   #else
123         /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
124         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "SSE Hardware unavailable");
125   #endif
126       } else {
127         inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
128       }
129     }
130 #else
131       inA->ops->lufactornumeric  = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
132 #endif
133     break;
134     case 5:
135       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
136       break;
137     case 6:
138       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
139       break;
140     case 7:
141       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
142       break;
143     default:
144       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
145       break;
146     }
147   } else {
148     switch (inA->rmap->bs) {
149     case 1:
150       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
151       break;
152     case 2:
153       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
154       break;
155     case 3:
156       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
157       break;
158     case 4:
159       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
160       break;
161     case 5:
162       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
163       break;
164     case 6:
165       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
166       break;
167     case 7:
168       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
169       break;
170     default:
171       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
172       break;
173     }
174   }
175   PetscFunctionReturn(PETSC_SUCCESS);
176 }
177 
178 /*
179     The symbolic factorization code is identical to that for AIJ format,
180   except for very small changes since this is now a SeqBAIJ datastructure.
181   NOT good code reuse.
182 */
183 #include <petscbt.h>
184 #include <../src/mat/utils/freespace.h>
185 
186 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
187 {
188   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data, *b;
189   PetscInt           n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
190   PetscBool          row_identity, col_identity, both_identity;
191   IS                 isicol;
192   const PetscInt    *r, *ic;
193   PetscInt           i, *ai = a->i, *aj = a->j;
194   PetscInt          *bi, *bj, *ajtmp;
195   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
196   PetscReal          f;
197   PetscInt           nlnk, *lnk, k, **bi_ptr;
198   PetscFreeSpaceList free_space = NULL, current_space = NULL;
199   PetscBT            lnkbt;
200   PetscBool          missing;
201 
202   PetscFunctionBegin;
203   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
204   PetscCall(MatMissingDiagonal(A, &missing, &i));
205   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
206 
207   if (bs > 1) { /* check shifttype */
208     PetscCheck(info->shifttype != (PetscReal)MAT_SHIFT_NONZERO && info->shifttype != (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
209   }
210 
211   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
212   PetscCall(ISGetIndices(isrow, &r));
213   PetscCall(ISGetIndices(isicol, &ic));
214 
215   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
216   PetscCall(PetscMalloc1(n + 1, &bi));
217   PetscCall(PetscMalloc1(n + 1, &bdiag));
218   bi[0] = bdiag[0] = 0;
219 
220   /* linked list for storing column indices of the active row */
221   nlnk = n + 1;
222   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
223 
224   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
225 
226   /* initial FreeSpace size is f*(ai[n]+1) */
227   f = info->fill;
228   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
229 
230   current_space = free_space;
231 
232   for (i = 0; i < n; i++) {
233     /* copy previous fill into linked list */
234     nzi   = 0;
235     nnz   = ai[r[i] + 1] - ai[r[i]];
236     ajtmp = aj + ai[r[i]];
237     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
238     nzi += nlnk;
239 
240     /* add pivot rows into linked list */
241     row = lnk[n];
242     while (row < i) {
243       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
244       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
245       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
246       nzi += nlnk;
247       row = lnk[row];
248     }
249     bi[i + 1] = bi[i] + nzi;
250     im[i]     = nzi;
251 
252     /* mark bdiag */
253     nzbd = 0;
254     nnz  = nzi;
255     k    = lnk[n];
256     while (nnz-- && k < i) {
257       nzbd++;
258       k = lnk[k];
259     }
260     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
261 
262     /* if free space is not available, make more free space */
263     if (current_space->local_remaining < nzi) {
264       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */
265       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
266       reallocs++;
267     }
268 
269     /* copy data into free space, then initialize lnk */
270     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
271 
272     bi_ptr[i] = current_space->array;
273     current_space->array += nzi;
274     current_space->local_used += nzi;
275     current_space->local_remaining -= nzi;
276   }
277 
278   PetscCall(ISRestoreIndices(isrow, &r));
279   PetscCall(ISRestoreIndices(isicol, &ic));
280 
281   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
282   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
283   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
284   PetscCall(PetscLLDestroy(lnk, lnkbt));
285   PetscCall(PetscFree2(bi_ptr, im));
286 
287   /* put together the new matrix */
288   PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
289   b = (Mat_SeqBAIJ *)(B)->data;
290 
291   b->free_a       = PETSC_TRUE;
292   b->free_ij      = PETSC_TRUE;
293   b->singlemalloc = PETSC_FALSE;
294 
295   PetscCall(PetscMalloc1((bdiag[0] + 1) * bs2, &b->a));
296   b->j             = bj;
297   b->i             = bi;
298   b->diag          = bdiag;
299   b->free_diag     = PETSC_TRUE;
300   b->ilen          = NULL;
301   b->imax          = NULL;
302   b->row           = isrow;
303   b->col           = iscol;
304   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
305 
306   PetscCall(PetscObjectReference((PetscObject)isrow));
307   PetscCall(PetscObjectReference((PetscObject)iscol));
308   b->icol = isicol;
309   PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));
310 
311   b->maxnz = b->nz = bdiag[0] + 1;
312 
313   B->factortype            = MAT_FACTOR_LU;
314   B->info.factor_mallocs   = reallocs;
315   B->info.fill_ratio_given = f;
316 
317   if (ai[n] != 0) {
318     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
319   } else {
320     B->info.fill_ratio_needed = 0.0;
321   }
322 #if defined(PETSC_USE_INFO)
323   if (ai[n] != 0) {
324     PetscReal af = B->info.fill_ratio_needed;
325     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
326     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
327     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
328     PetscCall(PetscInfo(A, "for best performance.\n"));
329   } else {
330     PetscCall(PetscInfo(A, "Empty matrix\n"));
331   }
332 #endif
333 
334   PetscCall(ISIdentity(isrow, &row_identity));
335   PetscCall(ISIdentity(iscol, &col_identity));
336 
337   both_identity = (PetscBool)(row_identity && col_identity);
338 
339   PetscCall(MatSeqBAIJSetNumericFactorization(B, both_identity));
340   PetscFunctionReturn(PETSC_SUCCESS);
341 }
342 
343 #if 0
344 // unused
345 static PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
346 {
347   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data, *b;
348   PetscInt           n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
349   PetscBool          row_identity, col_identity, both_identity;
350   IS                 isicol;
351   const PetscInt    *r, *ic;
352   PetscInt           i, *ai = a->i, *aj = a->j;
353   PetscInt          *bi, *bj, *ajtmp;
354   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
355   PetscReal          f;
356   PetscInt           nlnk, *lnk, k, **bi_ptr;
357   PetscFreeSpaceList free_space = NULL, current_space = NULL;
358   PetscBT            lnkbt;
359   PetscBool          missing;
360 
361   PetscFunctionBegin;
362   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
363   PetscCall(MatMissingDiagonal(A, &missing, &i));
364   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
365 
366   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
367   PetscCall(ISGetIndices(isrow, &r));
368   PetscCall(ISGetIndices(isicol, &ic));
369 
370   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
371   PetscCall(PetscMalloc1(n + 1, &bi));
372   PetscCall(PetscMalloc1(n + 1, &bdiag));
373 
374   bi[0] = bdiag[0] = 0;
375 
376   /* linked list for storing column indices of the active row */
377   nlnk = n + 1;
378   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
379 
380   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
381 
382   /* initial FreeSpace size is f*(ai[n]+1) */
383   f = info->fill;
384   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
385   current_space = free_space;
386 
387   for (i = 0; i < n; i++) {
388     /* copy previous fill into linked list */
389     nzi   = 0;
390     nnz   = ai[r[i] + 1] - ai[r[i]];
391     ajtmp = aj + ai[r[i]];
392     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
393     nzi += nlnk;
394 
395     /* add pivot rows into linked list */
396     row = lnk[n];
397     while (row < i) {
398       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
399       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
400       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
401       nzi += nlnk;
402       row = lnk[row];
403     }
404     bi[i + 1] = bi[i] + nzi;
405     im[i]     = nzi;
406 
407     /* mark bdiag */
408     nzbd = 0;
409     nnz  = nzi;
410     k    = lnk[n];
411     while (nnz-- && k < i) {
412       nzbd++;
413       k = lnk[k];
414     }
415     bdiag[i] = bi[i] + nzbd;
416 
417     /* if free space is not available, make more free space */
418     if (current_space->local_remaining < nzi) {
419       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
420       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
421       reallocs++;
422     }
423 
424     /* copy data into free space, then initialize lnk */
425     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
426 
427     bi_ptr[i] = current_space->array;
428     current_space->array += nzi;
429     current_space->local_used += nzi;
430     current_space->local_remaining -= nzi;
431   }
432   #if defined(PETSC_USE_INFO)
433   if (ai[n] != 0) {
434     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
435     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
436     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
437     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
438     PetscCall(PetscInfo(A, "for best performance.\n"));
439   } else {
440     PetscCall(PetscInfo(A, "Empty matrix\n"));
441   }
442   #endif
443 
444   PetscCall(ISRestoreIndices(isrow, &r));
445   PetscCall(ISRestoreIndices(isicol, &ic));
446 
447   /* destroy list of free space and other temporary array(s) */
448   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
449   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
450   PetscCall(PetscLLDestroy(lnk, lnkbt));
451   PetscCall(PetscFree2(bi_ptr, im));
452 
453   /* put together the new matrix */
454   PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
455   b = (Mat_SeqBAIJ *)(B)->data;
456 
457   b->free_a       = PETSC_TRUE;
458   b->free_ij      = PETSC_TRUE;
459   b->singlemalloc = PETSC_FALSE;
460 
461   PetscCall(PetscMalloc1((bi[n] + 1) * bs2, &b->a));
462   b->j             = bj;
463   b->i             = bi;
464   b->diag          = bdiag;
465   b->free_diag     = PETSC_TRUE;
466   b->ilen          = NULL;
467   b->imax          = NULL;
468   b->row           = isrow;
469   b->col           = iscol;
470   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
471 
472   PetscCall(PetscObjectReference((PetscObject)isrow));
473   PetscCall(PetscObjectReference((PetscObject)iscol));
474   b->icol = isicol;
475 
476   PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));
477 
478   b->maxnz = b->nz = bi[n];
479 
480   (B)->factortype            = MAT_FACTOR_LU;
481   (B)->info.factor_mallocs   = reallocs;
482   (B)->info.fill_ratio_given = f;
483 
484   if (ai[n] != 0) {
485     (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
486   } else {
487     (B)->info.fill_ratio_needed = 0.0;
488   }
489 
490   PetscCall(ISIdentity(isrow, &row_identity));
491   PetscCall(ISIdentity(iscol, &col_identity));
492 
493   both_identity = (PetscBool)(row_identity && col_identity);
494 
495   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(B, both_identity));
496   PetscFunctionReturn(PETSC_SUCCESS);
497 }
498 #endif
499