xref: /petsc/src/mat/impls/baij/seq/baijfact3.c (revision d2522c19e8fa9bca20aaca277941d9a63e71db6a)
1 
2 /*
3     Factorization code for BAIJ format.
4 */
5 #include <../src/mat/impls/baij/seq/baij.h>
6 #include <petsc/private/kernels/blockinvert.h>
7 
8 /*
9    This is used to set the numeric factorization for both LU and ILU symbolic factorization
10 */
11 PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural) {
12   PetscFunctionBegin;
13   if (natural) {
14     switch (fact->rmap->bs) {
15     case 1: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; break;
16     case 2: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; break;
17     case 3: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; break;
18     case 4: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; break;
19     case 5: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; break;
20     case 6: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; break;
21     case 7: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; break;
22     case 9:
23 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
24       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering;
25 #else
26       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
27 #endif
28       break;
29     case 15: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering; break;
30     default: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; break;
31     }
32   } else {
33     switch (fact->rmap->bs) {
34     case 1: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; break;
35     case 2: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; break;
36     case 3: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; break;
37     case 4: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; break;
38     case 5: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; break;
39     case 6: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; break;
40     case 7: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; break;
41     default: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; break;
42     }
43   }
44   PetscFunctionReturn(0);
45 }
46 
47 PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural) {
48   PetscFunctionBegin;
49   if (natural) {
50     switch (inA->rmap->bs) {
51     case 1: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; break;
52     case 2: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace; break;
53     case 3: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace; break;
54     case 4:
55 #if defined(PETSC_USE_REAL_MAT_SINGLE)
56     {
57       PetscBool sse_enabled_local;
58       PetscCall(PetscSSEIsEnabled(inA->comm, &sse_enabled_local, NULL));
59       if (sse_enabled_local) {
60 #if defined(PETSC_HAVE_SSE)
61         int i, *AJ = a->j, nz = a->nz, n = a->mbs;
62         if (n == (unsigned short)n) {
63           unsigned short *aj = (unsigned short *)AJ;
64           for (i = 0; i < nz; i++) aj[i] = (unsigned short)AJ[i];
65 
66           inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
67           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
68 
69           PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, ushort j index factor BS=4\n"));
70         } else {
71           /* Scale the column indices for easier indexing in MatSolve. */
72           /*            for (i=0;i<nz;i++) { */
73           /*              AJ[i] = AJ[i]*4; */
74           /*            } */
75           inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
76           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
77 
78           PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, int j index factor BS=4\n"));
79         }
80 #else
81         /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
82         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "SSE Hardware unavailable");
83 #endif
84       } else {
85         inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
86       }
87     }
88 #else
89       inA->ops->lufactornumeric  = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
90 #endif
91     break;
92     case 5: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace; break;
93     case 6: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace; break;
94     case 7: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace; break;
95     default: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; break;
96     }
97   } else {
98     switch (inA->rmap->bs) {
99     case 1: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; break;
100     case 2: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace; break;
101     case 3: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace; break;
102     case 4: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace; break;
103     case 5: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace; break;
104     case 6: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace; break;
105     case 7: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace; break;
106     default: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; break;
107     }
108   }
109   PetscFunctionReturn(0);
110 }
111 
112 /*
113     The symbolic factorization code is identical to that for AIJ format,
114   except for very small changes since this is now a SeqBAIJ datastructure.
115   NOT good code reuse.
116 */
117 #include <petscbt.h>
118 #include <../src/mat/utils/freespace.h>
119 
120 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
121   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data, *b;
122   PetscInt           n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
123   PetscBool          row_identity, col_identity, both_identity;
124   IS                 isicol;
125   const PetscInt    *r, *ic;
126   PetscInt           i, *ai = a->i, *aj = a->j;
127   PetscInt          *bi, *bj, *ajtmp;
128   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
129   PetscReal          f;
130   PetscInt           nlnk, *lnk, k, **bi_ptr;
131   PetscFreeSpaceList free_space = NULL, current_space = NULL;
132   PetscBT            lnkbt;
133   PetscBool          missing;
134 
135   PetscFunctionBegin;
136   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
137   PetscCall(MatMissingDiagonal(A, &missing, &i));
138   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
139 
140   if (bs > 1) { /* check shifttype */
141     PetscCheck(info->shifttype != MAT_SHIFT_NONZERO && info->shifttype != MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
142   }
143 
144   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
145   PetscCall(ISGetIndices(isrow, &r));
146   PetscCall(ISGetIndices(isicol, &ic));
147 
148   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
149   PetscCall(PetscMalloc1(n + 1, &bi));
150   PetscCall(PetscMalloc1(n + 1, &bdiag));
151   bi[0] = bdiag[0] = 0;
152 
153   /* linked list for storing column indices of the active row */
154   nlnk = n + 1;
155   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
156 
157   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
158 
159   /* initial FreeSpace size is f*(ai[n]+1) */
160   f = info->fill;
161   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
162 
163   current_space = free_space;
164 
165   for (i = 0; i < n; i++) {
166     /* copy previous fill into linked list */
167     nzi   = 0;
168     nnz   = ai[r[i] + 1] - ai[r[i]];
169     ajtmp = aj + ai[r[i]];
170     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
171     nzi += nlnk;
172 
173     /* add pivot rows into linked list */
174     row = lnk[n];
175     while (row < i) {
176       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
177       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
178       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
179       nzi += nlnk;
180       row = lnk[row];
181     }
182     bi[i + 1] = bi[i] + nzi;
183     im[i]     = nzi;
184 
185     /* mark bdiag */
186     nzbd = 0;
187     nnz  = nzi;
188     k    = lnk[n];
189     while (nnz-- && k < i) {
190       nzbd++;
191       k = lnk[k];
192     }
193     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
194 
195     /* if free space is not available, make more free space */
196     if (current_space->local_remaining < nzi) {
197       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */
198       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
199       reallocs++;
200     }
201 
202     /* copy data into free space, then initialize lnk */
203     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
204 
205     bi_ptr[i] = current_space->array;
206     current_space->array += nzi;
207     current_space->local_used += nzi;
208     current_space->local_remaining -= nzi;
209   }
210 
211   PetscCall(ISRestoreIndices(isrow, &r));
212   PetscCall(ISRestoreIndices(isicol, &ic));
213 
214   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
215   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
216   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
217   PetscCall(PetscLLDestroy(lnk, lnkbt));
218   PetscCall(PetscFree2(bi_ptr, im));
219 
220   /* put together the new matrix */
221   PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
222   PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)isicol));
223   b = (Mat_SeqBAIJ *)(B)->data;
224 
225   b->free_a       = PETSC_TRUE;
226   b->free_ij      = PETSC_TRUE;
227   b->singlemalloc = PETSC_FALSE;
228 
229   PetscCall(PetscMalloc1((bdiag[0] + 1) * bs2, &b->a));
230   b->j             = bj;
231   b->i             = bi;
232   b->diag          = bdiag;
233   b->free_diag     = PETSC_TRUE;
234   b->ilen          = NULL;
235   b->imax          = NULL;
236   b->row           = isrow;
237   b->col           = iscol;
238   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
239 
240   PetscCall(PetscObjectReference((PetscObject)isrow));
241   PetscCall(PetscObjectReference((PetscObject)iscol));
242   b->icol = isicol;
243   PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));
244   PetscCall(PetscLogObjectMemory((PetscObject)B, (bdiag[0] + 1) * (sizeof(PetscInt) + sizeof(PetscScalar) * bs2)));
245 
246   b->maxnz = b->nz = bdiag[0] + 1;
247 
248   B->factortype            = MAT_FACTOR_LU;
249   B->info.factor_mallocs   = reallocs;
250   B->info.fill_ratio_given = f;
251 
252   if (ai[n] != 0) {
253     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
254   } else {
255     B->info.fill_ratio_needed = 0.0;
256   }
257 #if defined(PETSC_USE_INFO)
258   if (ai[n] != 0) {
259     PetscReal af = B->info.fill_ratio_needed;
260     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
261     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
262     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
263     PetscCall(PetscInfo(A, "for best performance.\n"));
264   } else {
265     PetscCall(PetscInfo(A, "Empty matrix\n"));
266   }
267 #endif
268 
269   PetscCall(ISIdentity(isrow, &row_identity));
270   PetscCall(ISIdentity(iscol, &col_identity));
271 
272   both_identity = (PetscBool)(row_identity && col_identity);
273 
274   PetscCall(MatSeqBAIJSetNumericFactorization(B, both_identity));
275   PetscFunctionReturn(0);
276 }
277 
278 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) {
279   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data, *b;
280   PetscInt           n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
281   PetscBool          row_identity, col_identity, both_identity;
282   IS                 isicol;
283   const PetscInt    *r, *ic;
284   PetscInt           i, *ai = a->i, *aj = a->j;
285   PetscInt          *bi, *bj, *ajtmp;
286   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
287   PetscReal          f;
288   PetscInt           nlnk, *lnk, k, **bi_ptr;
289   PetscFreeSpaceList free_space = NULL, current_space = NULL;
290   PetscBT            lnkbt;
291   PetscBool          missing;
292 
293   PetscFunctionBegin;
294   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
295   PetscCall(MatMissingDiagonal(A, &missing, &i));
296   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
297 
298   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
299   PetscCall(ISGetIndices(isrow, &r));
300   PetscCall(ISGetIndices(isicol, &ic));
301 
302   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
303   PetscCall(PetscMalloc1(n + 1, &bi));
304   PetscCall(PetscMalloc1(n + 1, &bdiag));
305 
306   bi[0] = bdiag[0] = 0;
307 
308   /* linked list for storing column indices of the active row */
309   nlnk = n + 1;
310   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
311 
312   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
313 
314   /* initial FreeSpace size is f*(ai[n]+1) */
315   f = info->fill;
316   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
317   current_space = free_space;
318 
319   for (i = 0; i < n; i++) {
320     /* copy previous fill into linked list */
321     nzi   = 0;
322     nnz   = ai[r[i] + 1] - ai[r[i]];
323     ajtmp = aj + ai[r[i]];
324     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
325     nzi += nlnk;
326 
327     /* add pivot rows into linked list */
328     row = lnk[n];
329     while (row < i) {
330       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
331       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
332       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
333       nzi += nlnk;
334       row = lnk[row];
335     }
336     bi[i + 1] = bi[i] + nzi;
337     im[i]     = nzi;
338 
339     /* mark bdiag */
340     nzbd = 0;
341     nnz  = nzi;
342     k    = lnk[n];
343     while (nnz-- && k < i) {
344       nzbd++;
345       k = lnk[k];
346     }
347     bdiag[i] = bi[i] + nzbd;
348 
349     /* if free space is not available, make more free space */
350     if (current_space->local_remaining < nzi) {
351       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
352       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
353       reallocs++;
354     }
355 
356     /* copy data into free space, then initialize lnk */
357     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
358 
359     bi_ptr[i] = current_space->array;
360     current_space->array += nzi;
361     current_space->local_used += nzi;
362     current_space->local_remaining -= nzi;
363   }
364 #if defined(PETSC_USE_INFO)
365   if (ai[n] != 0) {
366     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
367     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
368     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
369     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
370     PetscCall(PetscInfo(A, "for best performance.\n"));
371   } else {
372     PetscCall(PetscInfo(A, "Empty matrix\n"));
373   }
374 #endif
375 
376   PetscCall(ISRestoreIndices(isrow, &r));
377   PetscCall(ISRestoreIndices(isicol, &ic));
378 
379   /* destroy list of free space and other temporary array(s) */
380   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
381   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
382   PetscCall(PetscLLDestroy(lnk, lnkbt));
383   PetscCall(PetscFree2(bi_ptr, im));
384 
385   /* put together the new matrix */
386   PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
387   PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)isicol));
388   b = (Mat_SeqBAIJ *)(B)->data;
389 
390   b->free_a       = PETSC_TRUE;
391   b->free_ij      = PETSC_TRUE;
392   b->singlemalloc = PETSC_FALSE;
393 
394   PetscCall(PetscMalloc1((bi[n] + 1) * bs2, &b->a));
395   b->j             = bj;
396   b->i             = bi;
397   b->diag          = bdiag;
398   b->free_diag     = PETSC_TRUE;
399   b->ilen          = NULL;
400   b->imax          = NULL;
401   b->row           = isrow;
402   b->col           = iscol;
403   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
404 
405   PetscCall(PetscObjectReference((PetscObject)isrow));
406   PetscCall(PetscObjectReference((PetscObject)iscol));
407   b->icol = isicol;
408 
409   PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));
410   PetscCall(PetscLogObjectMemory((PetscObject)B, (bi[n] - n) * (sizeof(PetscInt) + sizeof(PetscScalar) * bs2)));
411 
412   b->maxnz = b->nz = bi[n];
413 
414   (B)->factortype            = MAT_FACTOR_LU;
415   (B)->info.factor_mallocs   = reallocs;
416   (B)->info.fill_ratio_given = f;
417 
418   if (ai[n] != 0) {
419     (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
420   } else {
421     (B)->info.fill_ratio_needed = 0.0;
422   }
423 
424   PetscCall(ISIdentity(isrow, &row_identity));
425   PetscCall(ISIdentity(iscol, &col_identity));
426 
427   both_identity = (PetscBool)(row_identity && col_identity);
428 
429   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(B, both_identity));
430   PetscFunctionReturn(0);
431 }
432