xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision e0f5bfbec699682fa3e8b8532b1176849ea4e12a)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/baij/seq/baij.h>
3 #include <../src/mat/impls/sell/seq/sell.h>
4 #include <petsc/private/isimpl.h>
5 
6 /*
7     This routine is shared by SeqAIJ and SeqBAIJ matrices,
8     since it operators only on the nonzero structure of the elements or blocks.
9 */
10 PetscErrorCode MatFDColoringCreate_SeqXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c) {
11   PetscInt  bs, nis = iscoloring->n, m = mat->rmap->n;
12   PetscBool isBAIJ, isSELL;
13 
14   PetscFunctionBegin;
15   /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian */
16   PetscCall(MatGetBlockSize(mat, &bs));
17   PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQBAIJ, &isBAIJ));
18   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSELL, &isSELL));
19   if (isBAIJ) {
20     c->brows = m;
21     c->bcols = 1;
22   } else { /* seqaij matrix */
23     /* bcols is chosen s.t. dy-array takes 50% of memory space as mat */
24     PetscReal mem;
25     PetscInt  nz, brows, bcols;
26     if (isSELL) {
27       Mat_SeqSELL *spA = (Mat_SeqSELL *)mat->data;
28       nz               = spA->nz;
29     } else {
30       Mat_SeqAIJ *spA = (Mat_SeqAIJ *)mat->data;
31       nz              = spA->nz;
32     }
33 
34     bs    = 1; /* only bs=1 is supported for SeqAIJ matrix */
35     mem   = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt);
36     bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar)));
37     brows = 1000 / bcols;
38     if (bcols > nis) bcols = nis;
39     if (brows == 0 || brows > m) brows = m;
40     c->brows = brows;
41     c->bcols = bcols;
42   }
43 
44   c->M       = mat->rmap->N / bs; /* set total rows, columns and local rows */
45   c->N       = mat->cmap->N / bs;
46   c->m       = mat->rmap->N / bs;
47   c->rstart  = 0;
48   c->ncolors = nis;
49   c->ctype   = iscoloring->ctype;
50   PetscFunctionReturn(0);
51 }
52 
53 /*
54  Reorder Jentry such that blocked brows*bols of entries from dense matrix are inserted into Jacobian for improved cache performance
55    Input Parameters:
56 +  mat - the matrix containing the nonzero structure of the Jacobian
57 .  color - the coloring context
58 -  nz - number of local non-zeros in mat
59 */
60 PetscErrorCode MatFDColoringSetUpBlocked_AIJ_Private(Mat mat, MatFDColoring c, PetscInt nz) {
61   PetscInt  i, j, nrows, nbcols, brows = c->brows, bcols = c->bcols, mbs = c->m, nis = c->ncolors;
62   PetscInt *color_start, *row_start, *nrows_new, nz_new, row_end;
63 
64   PetscFunctionBegin;
65   if (brows < 1 || brows > mbs) brows = mbs;
66   PetscCall(PetscMalloc2(bcols + 1, &color_start, bcols, &row_start));
67   PetscCall(PetscCalloc1(nis, &nrows_new));
68   PetscCall(PetscMalloc1(bcols * mat->rmap->n, &c->dy));
69 
70   nz_new             = 0;
71   nbcols             = 0;
72   color_start[bcols] = 0;
73 
74   if (c->htype[0] == 'd') { /* ----  c->htype == 'ds', use MatEntry --------*/
75     MatEntry *Jentry_new, *Jentry = c->matentry;
76 
77     PetscCall(PetscMalloc1(nz, &Jentry_new));
78     for (i = 0; i < nis; i += bcols) { /* loop over colors */
79       if (i + bcols > nis) {
80         color_start[nis - i] = color_start[bcols];
81         bcols                = nis - i;
82       }
83 
84       color_start[0] = color_start[bcols];
85       for (j = 0; j < bcols; j++) {
86         color_start[j + 1] = c->nrows[i + j] + color_start[j];
87         row_start[j]       = 0;
88       }
89 
90       row_end = brows;
91       if (row_end > mbs) row_end = mbs;
92 
93       while (row_end <= mbs) {        /* loop over block rows */
94         for (j = 0; j < bcols; j++) { /* loop over block columns */
95           nrows = c->nrows[i + j];
96           nz    = color_start[j];
97           while (row_start[j] < nrows) {
98             if (Jentry[nz].row >= row_end) {
99               color_start[j] = nz;
100               break;
101             } else {                                                 /* copy Jentry[nz] to Jentry_new[nz_new] */
102               Jentry_new[nz_new].row     = Jentry[nz].row + j * mbs; /* index in dy-array */
103               Jentry_new[nz_new].col     = Jentry[nz].col;
104               Jentry_new[nz_new].valaddr = Jentry[nz].valaddr;
105               nz_new++;
106               nz++;
107               row_start[j]++;
108             }
109           }
110         }
111         if (row_end == mbs) break;
112         row_end += brows;
113         if (row_end > mbs) row_end = mbs;
114       }
115       nrows_new[nbcols++] = nz_new;
116     }
117     PetscCall(PetscFree(Jentry));
118     c->matentry = Jentry_new;
119   } else { /* ---------  c->htype == 'wp', use MatEntry2 ------------------*/
120     MatEntry2 *Jentry2_new, *Jentry2 = c->matentry2;
121 
122     PetscCall(PetscMalloc1(nz, &Jentry2_new));
123     for (i = 0; i < nis; i += bcols) { /* loop over colors */
124       if (i + bcols > nis) {
125         color_start[nis - i] = color_start[bcols];
126         bcols                = nis - i;
127       }
128 
129       color_start[0] = color_start[bcols];
130       for (j = 0; j < bcols; j++) {
131         color_start[j + 1] = c->nrows[i + j] + color_start[j];
132         row_start[j]       = 0;
133       }
134 
135       row_end = brows;
136       if (row_end > mbs) row_end = mbs;
137 
138       while (row_end <= mbs) {        /* loop over block rows */
139         for (j = 0; j < bcols; j++) { /* loop over block columns */
140           nrows = c->nrows[i + j];
141           nz    = color_start[j];
142           while (row_start[j] < nrows) {
143             if (Jentry2[nz].row >= row_end) {
144               color_start[j] = nz;
145               break;
146             } else {                                                   /* copy Jentry2[nz] to Jentry2_new[nz_new] */
147               Jentry2_new[nz_new].row     = Jentry2[nz].row + j * mbs; /* index in dy-array */
148               Jentry2_new[nz_new].valaddr = Jentry2[nz].valaddr;
149               nz_new++;
150               nz++;
151               row_start[j]++;
152             }
153           }
154         }
155         if (row_end == mbs) break;
156         row_end += brows;
157         if (row_end > mbs) row_end = mbs;
158       }
159       nrows_new[nbcols++] = nz_new;
160     }
161     PetscCall(PetscFree(Jentry2));
162     c->matentry2 = Jentry2_new;
163   } /* ---------------------------------------------*/
164 
165   PetscCall(PetscFree2(color_start, row_start));
166 
167   for (i = nbcols - 1; i > 0; i--) nrows_new[i] -= nrows_new[i - 1];
168   PetscCall(PetscFree(c->nrows));
169   c->nrows = nrows_new;
170   PetscFunctionReturn(0);
171 }
172 
173 PetscErrorCode MatFDColoringSetUp_SeqXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c) {
174   PetscInt           i, n, nrows, mbs = c->m, j, k, m, ncols, col, nis = iscoloring->n, *rowhit, bs, bs2, *spidx, nz, tmp;
175   const PetscInt    *is, *row, *ci, *cj;
176   PetscBool          isBAIJ, isSELL;
177   const PetscScalar *A_val;
178   PetscScalar      **valaddrhit;
179   MatEntry          *Jentry;
180   MatEntry2         *Jentry2;
181 
182   PetscFunctionBegin;
183   PetscCall(ISColoringGetIS(iscoloring, PETSC_OWN_POINTER, PETSC_IGNORE, &c->isa));
184 
185   PetscCall(MatGetBlockSize(mat, &bs));
186   PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQBAIJ, &isBAIJ));
187   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSELL, &isSELL));
188   if (isBAIJ) {
189     Mat_SeqBAIJ *spA = (Mat_SeqBAIJ *)mat->data;
190 
191     A_val = spA->a;
192     nz    = spA->nz;
193   } else if (isSELL) {
194     Mat_SeqSELL *spA = (Mat_SeqSELL *)mat->data;
195 
196     A_val = spA->val;
197     nz    = spA->nz;
198     bs    = 1; /* only bs=1 is supported for SeqSELL matrix */
199   } else {
200     Mat_SeqAIJ *spA = (Mat_SeqAIJ *)mat->data;
201 
202     A_val = spA->a;
203     nz    = spA->nz;
204     bs    = 1; /* only bs=1 is supported for SeqAIJ matrix */
205   }
206 
207   PetscCall(PetscMalloc2(nis, &c->ncolumns, nis, &c->columns));
208   PetscCall(PetscMalloc1(nis, &c->nrows)); /* nrows is freeed separately from ncolumns and columns */
209 
210   if (c->htype[0] == 'd') {
211     PetscCall(PetscMalloc1(nz, &Jentry));
212     c->matentry = Jentry;
213   } else if (c->htype[0] == 'w') {
214     PetscCall(PetscMalloc1(nz, &Jentry2));
215     c->matentry2 = Jentry2;
216   } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "htype is not supported");
217 
218   if (isBAIJ) {
219     PetscCall(MatGetColumnIJ_SeqBAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
220   } else if (isSELL) {
221     PetscCall(MatGetColumnIJ_SeqSELL_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
222   } else {
223     PetscCall(MatGetColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
224   }
225 
226   PetscCall(PetscCalloc1(c->m, &rowhit));
227   PetscCall(PetscMalloc1(c->m, &valaddrhit));
228 
229   nz = 0;
230   for (i = 0; i < nis; i++) { /* loop over colors */
231     PetscCall(ISGetLocalSize(c->isa[i], &n));
232     PetscCall(ISGetIndices(c->isa[i], &is));
233 
234     c->ncolumns[i] = n;
235     c->columns[i]  = (PetscInt *)is;
236     /* note: we know that c->isa is going to be around as long at the c->columns values */
237     PetscCall(ISRestoreIndices(c->isa[i], &is));
238 
239     /* fast, crude version requires O(N*N) work */
240     bs2   = bs * bs;
241     nrows = 0;
242     for (j = 0; j < n; j++) { /* loop over columns */
243       col = is[j];
244       tmp = ci[col];
245       row = cj + tmp;
246       m   = ci[col + 1] - tmp;
247       nrows += m;
248       for (k = 0; k < m; k++) { /* loop over columns marking them in rowhit */
249         rowhit[*row]       = col + 1;
250         valaddrhit[*row++] = (PetscScalar *)&A_val[bs2 * spidx[tmp + k]];
251       }
252     }
253     c->nrows[i] = nrows; /* total num of rows for this color */
254 
255     if (c->htype[0] == 'd') {
256       for (j = 0; j < mbs; j++) { /* loop over rows */
257         if (rowhit[j]) {
258           Jentry[nz].row     = j;             /* local row index */
259           Jentry[nz].col     = rowhit[j] - 1; /* local column index */
260           Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
261           nz++;
262           rowhit[j] = 0.0; /* zero rowhit for reuse */
263         }
264       }
265     } else {                      /* c->htype == 'wp' */
266       for (j = 0; j < mbs; j++) { /* loop over rows */
267         if (rowhit[j]) {
268           Jentry2[nz].row     = j;             /* local row index */
269           Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
270           nz++;
271           rowhit[j] = 0.0; /* zero rowhit for reuse */
272         }
273       }
274     }
275   }
276 
277   if (c->bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */
278     PetscCall(MatFDColoringSetUpBlocked_AIJ_Private(mat, c, nz));
279   }
280 
281   if (isBAIJ) {
282     PetscCall(MatRestoreColumnIJ_SeqBAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
283     PetscCall(PetscMalloc1(bs * mat->rmap->n, &c->dy));
284   } else if (isSELL) {
285     PetscCall(MatRestoreColumnIJ_SeqSELL_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
286   } else {
287     PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
288   }
289   PetscCall(PetscFree(rowhit));
290   PetscCall(PetscFree(valaddrhit));
291   PetscCall(ISColoringRestoreIS(iscoloring, PETSC_OWN_POINTER, &c->isa));
292 
293   PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->rmap->n, PETSC_DETERMINE, 0, NULL, &c->vscale));
294   PetscCall(PetscInfo(c, "ncolors %" PetscInt_FMT ", brows %" PetscInt_FMT " and bcols %" PetscInt_FMT " are used.\n", c->ncolors, c->brows, c->bcols));
295   PetscFunctionReturn(0);
296 }
297