xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision 58d68138c660dfb4e9f5b03334792cd4f2ffd7cc)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/baij/seq/baij.h>
3 #include <../src/mat/impls/sell/seq/sell.h>
4 #include <petsc/private/isimpl.h>
5 
6 /*
7     This routine is shared by SeqAIJ and SeqBAIJ matrices,
8     since it operators only on the nonzero structure of the elements or blocks.
9 */
10 PetscErrorCode MatFDColoringCreate_SeqXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c) {
11   PetscInt  bs, nis = iscoloring->n, m = mat->rmap->n;
12   PetscBool isBAIJ, isSELL;
13 
14   PetscFunctionBegin;
15   /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian */
16   PetscCall(MatGetBlockSize(mat, &bs));
17   PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQBAIJ, &isBAIJ));
18   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSELL, &isSELL));
19   if (isBAIJ) {
20     c->brows = m;
21     c->bcols = 1;
22   } else { /* seqaij matrix */
23     /* bcols is chosen s.t. dy-array takes 50% of memory space as mat */
24     PetscReal mem;
25     PetscInt  nz, brows, bcols;
26     if (isSELL) {
27       Mat_SeqSELL *spA = (Mat_SeqSELL *)mat->data;
28       nz               = spA->nz;
29     } else {
30       Mat_SeqAIJ *spA = (Mat_SeqAIJ *)mat->data;
31       nz              = spA->nz;
32     }
33 
34     bs    = 1; /* only bs=1 is supported for SeqAIJ matrix */
35     mem   = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt);
36     bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar)));
37     brows = 1000 / bcols;
38     if (bcols > nis) bcols = nis;
39     if (brows == 0 || brows > m) brows = m;
40     c->brows = brows;
41     c->bcols = bcols;
42   }
43 
44   c->M       = mat->rmap->N / bs; /* set total rows, columns and local rows */
45   c->N       = mat->cmap->N / bs;
46   c->m       = mat->rmap->N / bs;
47   c->rstart  = 0;
48   c->ncolors = nis;
49   c->ctype   = iscoloring->ctype;
50   PetscFunctionReturn(0);
51 }
52 
53 /*
54  Reorder Jentry such that blocked brows*bols of entries from dense matrix are inserted into Jacobian for improved cache performance
55    Input Parameters:
56 +  mat - the matrix containing the nonzero structure of the Jacobian
57 .  color - the coloring context
58 -  nz - number of local non-zeros in mat
59 */
60 PetscErrorCode MatFDColoringSetUpBlocked_AIJ_Private(Mat mat, MatFDColoring c, PetscInt nz) {
61   PetscInt  i, j, nrows, nbcols, brows = c->brows, bcols = c->bcols, mbs = c->m, nis = c->ncolors;
62   PetscInt *color_start, *row_start, *nrows_new, nz_new, row_end;
63 
64   PetscFunctionBegin;
65   if (brows < 1 || brows > mbs) brows = mbs;
66   PetscCall(PetscMalloc2(bcols + 1, &color_start, bcols, &row_start));
67   PetscCall(PetscCalloc1(nis, &nrows_new));
68   PetscCall(PetscMalloc1(bcols * mat->rmap->n, &c->dy));
69   PetscCall(PetscLogObjectMemory((PetscObject)c, bcols * mat->rmap->n * sizeof(PetscScalar)));
70 
71   nz_new             = 0;
72   nbcols             = 0;
73   color_start[bcols] = 0;
74 
75   if (c->htype[0] == 'd') { /* ----  c->htype == 'ds', use MatEntry --------*/
76     MatEntry *Jentry_new, *Jentry = c->matentry;
77 
78     PetscCall(PetscMalloc1(nz, &Jentry_new));
79     for (i = 0; i < nis; i += bcols) { /* loop over colors */
80       if (i + bcols > nis) {
81         color_start[nis - i] = color_start[bcols];
82         bcols                = nis - i;
83       }
84 
85       color_start[0] = color_start[bcols];
86       for (j = 0; j < bcols; j++) {
87         color_start[j + 1] = c->nrows[i + j] + color_start[j];
88         row_start[j]       = 0;
89       }
90 
91       row_end = brows;
92       if (row_end > mbs) row_end = mbs;
93 
94       while (row_end <= mbs) {        /* loop over block rows */
95         for (j = 0; j < bcols; j++) { /* loop over block columns */
96           nrows = c->nrows[i + j];
97           nz    = color_start[j];
98           while (row_start[j] < nrows) {
99             if (Jentry[nz].row >= row_end) {
100               color_start[j] = nz;
101               break;
102             } else {                                                 /* copy Jentry[nz] to Jentry_new[nz_new] */
103               Jentry_new[nz_new].row     = Jentry[nz].row + j * mbs; /* index in dy-array */
104               Jentry_new[nz_new].col     = Jentry[nz].col;
105               Jentry_new[nz_new].valaddr = Jentry[nz].valaddr;
106               nz_new++;
107               nz++;
108               row_start[j]++;
109             }
110           }
111         }
112         if (row_end == mbs) break;
113         row_end += brows;
114         if (row_end > mbs) row_end = mbs;
115       }
116       nrows_new[nbcols++] = nz_new;
117     }
118     PetscCall(PetscFree(Jentry));
119     c->matentry = Jentry_new;
120   } else { /* ---------  c->htype == 'wp', use MatEntry2 ------------------*/
121     MatEntry2 *Jentry2_new, *Jentry2 = c->matentry2;
122 
123     PetscCall(PetscMalloc1(nz, &Jentry2_new));
124     for (i = 0; i < nis; i += bcols) { /* loop over colors */
125       if (i + bcols > nis) {
126         color_start[nis - i] = color_start[bcols];
127         bcols                = nis - i;
128       }
129 
130       color_start[0] = color_start[bcols];
131       for (j = 0; j < bcols; j++) {
132         color_start[j + 1] = c->nrows[i + j] + color_start[j];
133         row_start[j]       = 0;
134       }
135 
136       row_end = brows;
137       if (row_end > mbs) row_end = mbs;
138 
139       while (row_end <= mbs) {        /* loop over block rows */
140         for (j = 0; j < bcols; j++) { /* loop over block columns */
141           nrows = c->nrows[i + j];
142           nz    = color_start[j];
143           while (row_start[j] < nrows) {
144             if (Jentry2[nz].row >= row_end) {
145               color_start[j] = nz;
146               break;
147             } else {                                                   /* copy Jentry2[nz] to Jentry2_new[nz_new] */
148               Jentry2_new[nz_new].row     = Jentry2[nz].row + j * mbs; /* index in dy-array */
149               Jentry2_new[nz_new].valaddr = Jentry2[nz].valaddr;
150               nz_new++;
151               nz++;
152               row_start[j]++;
153             }
154           }
155         }
156         if (row_end == mbs) break;
157         row_end += brows;
158         if (row_end > mbs) row_end = mbs;
159       }
160       nrows_new[nbcols++] = nz_new;
161     }
162     PetscCall(PetscFree(Jentry2));
163     c->matentry2 = Jentry2_new;
164   } /* ---------------------------------------------*/
165 
166   PetscCall(PetscFree2(color_start, row_start));
167 
168   for (i = nbcols - 1; i > 0; i--) nrows_new[i] -= nrows_new[i - 1];
169   PetscCall(PetscFree(c->nrows));
170   c->nrows = nrows_new;
171   PetscFunctionReturn(0);
172 }
173 
174 PetscErrorCode MatFDColoringSetUp_SeqXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c) {
175   PetscInt           i, n, nrows, mbs = c->m, j, k, m, ncols, col, nis = iscoloring->n, *rowhit, bs, bs2, *spidx, nz, tmp;
176   const PetscInt    *is, *row, *ci, *cj;
177   PetscBool          isBAIJ, isSELL;
178   const PetscScalar *A_val;
179   PetscScalar      **valaddrhit;
180   MatEntry          *Jentry;
181   MatEntry2         *Jentry2;
182 
183   PetscFunctionBegin;
184   PetscCall(ISColoringGetIS(iscoloring, PETSC_OWN_POINTER, PETSC_IGNORE, &c->isa));
185 
186   PetscCall(MatGetBlockSize(mat, &bs));
187   PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQBAIJ, &isBAIJ));
188   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSELL, &isSELL));
189   if (isBAIJ) {
190     Mat_SeqBAIJ *spA = (Mat_SeqBAIJ *)mat->data;
191 
192     A_val = spA->a;
193     nz    = spA->nz;
194   } else if (isSELL) {
195     Mat_SeqSELL *spA = (Mat_SeqSELL *)mat->data;
196 
197     A_val = spA->val;
198     nz    = spA->nz;
199     bs    = 1; /* only bs=1 is supported for SeqSELL matrix */
200   } else {
201     Mat_SeqAIJ *spA = (Mat_SeqAIJ *)mat->data;
202 
203     A_val = spA->a;
204     nz    = spA->nz;
205     bs    = 1; /* only bs=1 is supported for SeqAIJ matrix */
206   }
207 
208   PetscCall(PetscMalloc2(nis, &c->ncolumns, nis, &c->columns));
209   PetscCall(PetscMalloc1(nis, &c->nrows)); /* nrows is freeed separately from ncolumns and columns */
210   PetscCall(PetscLogObjectMemory((PetscObject)c, 3 * nis * sizeof(PetscInt)));
211 
212   if (c->htype[0] == 'd') {
213     PetscCall(PetscMalloc1(nz, &Jentry));
214     PetscCall(PetscLogObjectMemory((PetscObject)c, nz * sizeof(MatEntry)));
215     c->matentry = Jentry;
216   } else if (c->htype[0] == 'w') {
217     PetscCall(PetscMalloc1(nz, &Jentry2));
218     PetscCall(PetscLogObjectMemory((PetscObject)c, nz * sizeof(MatEntry2)));
219     c->matentry2 = Jentry2;
220   } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "htype is not supported");
221 
222   if (isBAIJ) {
223     PetscCall(MatGetColumnIJ_SeqBAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
224   } else if (isSELL) {
225     PetscCall(MatGetColumnIJ_SeqSELL_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
226   } else {
227     PetscCall(MatGetColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
228   }
229 
230   PetscCall(PetscCalloc1(c->m, &rowhit));
231   PetscCall(PetscMalloc1(c->m, &valaddrhit));
232 
233   nz = 0;
234   for (i = 0; i < nis; i++) { /* loop over colors */
235     PetscCall(ISGetLocalSize(c->isa[i], &n));
236     PetscCall(ISGetIndices(c->isa[i], &is));
237 
238     c->ncolumns[i] = n;
239     c->columns[i]  = (PetscInt *)is;
240     /* note: we know that c->isa is going to be around as long at the c->columns values */
241     PetscCall(ISRestoreIndices(c->isa[i], &is));
242 
243     /* fast, crude version requires O(N*N) work */
244     bs2   = bs * bs;
245     nrows = 0;
246     for (j = 0; j < n; j++) { /* loop over columns */
247       col = is[j];
248       tmp = ci[col];
249       row = cj + tmp;
250       m   = ci[col + 1] - tmp;
251       nrows += m;
252       for (k = 0; k < m; k++) { /* loop over columns marking them in rowhit */
253         rowhit[*row]       = col + 1;
254         valaddrhit[*row++] = (PetscScalar *)&A_val[bs2 * spidx[tmp + k]];
255       }
256     }
257     c->nrows[i] = nrows; /* total num of rows for this color */
258 
259     if (c->htype[0] == 'd') {
260       for (j = 0; j < mbs; j++) { /* loop over rows */
261         if (rowhit[j]) {
262           Jentry[nz].row     = j;             /* local row index */
263           Jentry[nz].col     = rowhit[j] - 1; /* local column index */
264           Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
265           nz++;
266           rowhit[j] = 0.0; /* zero rowhit for reuse */
267         }
268       }
269     } else {                      /* c->htype == 'wp' */
270       for (j = 0; j < mbs; j++) { /* loop over rows */
271         if (rowhit[j]) {
272           Jentry2[nz].row     = j;             /* local row index */
273           Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
274           nz++;
275           rowhit[j] = 0.0; /* zero rowhit for reuse */
276         }
277       }
278     }
279   }
280 
281   if (c->bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */
282     PetscCall(MatFDColoringSetUpBlocked_AIJ_Private(mat, c, nz));
283   }
284 
285   if (isBAIJ) {
286     PetscCall(MatRestoreColumnIJ_SeqBAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
287     PetscCall(PetscMalloc1(bs * mat->rmap->n, &c->dy));
288     PetscCall(PetscLogObjectMemory((PetscObject)c, bs * mat->rmap->n * sizeof(PetscScalar)));
289   } else if (isSELL) {
290     PetscCall(MatRestoreColumnIJ_SeqSELL_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
291   } else {
292     PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
293   }
294   PetscCall(PetscFree(rowhit));
295   PetscCall(PetscFree(valaddrhit));
296   PetscCall(ISColoringRestoreIS(iscoloring, PETSC_OWN_POINTER, &c->isa));
297 
298   PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->rmap->n, PETSC_DETERMINE, 0, NULL, &c->vscale));
299   PetscCall(PetscInfo(c, "ncolors %" PetscInt_FMT ", brows %" PetscInt_FMT " and bcols %" PetscInt_FMT " are used.\n", c->ncolors, c->brows, c->bcols));
300   PetscFunctionReturn(0);
301 }
302