xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision 609bdbee21ea3be08735c64dbe00a9ab27759925)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/baij/seq/baij.h>
4 #include <petsc/private/isimpl.h>
5 
6 /*
7     This routine is shared by SeqAIJ and SeqBAIJ matrices,
8     since it operators only on the nonzero structure of the elements or blocks.
9 */
10 PetscErrorCode MatFDColoringCreate_SeqXAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
11 {
12   PetscErrorCode ierr;
13   PetscInt       bs,nis=iscoloring->n,m=mat->rmap->n;
14   PetscBool      isBAIJ;
15 
16   PetscFunctionBegin;
17   /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian */
18   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
19   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
20   if (isBAIJ) {
21     c->brows = m;
22     c->bcols = 1;
23   } else { /* seqaij matrix */
24     /* bcols is chosen s.t. dy-array takes 50% of memory space as mat */
25     Mat_SeqAIJ *spA = (Mat_SeqAIJ*)mat->data;
26     PetscReal  mem;
27     PetscInt   nz,brows,bcols;
28 
29     bs    = 1; /* only bs=1 is supported for SeqAIJ matrix */
30 
31     nz    = spA->nz;
32     mem   = nz*(sizeof(PetscScalar) + sizeof(PetscInt)) + 3*m*sizeof(PetscInt);
33     bcols = (PetscInt)(0.5*mem /(m*sizeof(PetscScalar)));
34     brows = 1000/bcols;
35     if (bcols > nis) bcols = nis;
36     if (brows == 0 || brows > m) brows = m;
37     c->brows = brows;
38     c->bcols = bcols;
39   }
40 
41   c->M       = mat->rmap->N/bs;   /* set total rows, columns and local rows */
42   c->N       = mat->cmap->N/bs;
43   c->m       = mat->rmap->N/bs;
44   c->rstart  = 0;
45   c->ncolors = nis;
46   c->ctype   = iscoloring->ctype;
47   PetscFunctionReturn(0);
48 }
49 
50 /*
51  Reorder Jentry such that blocked brows*bols of entries from dense matrix are inserted into Jacobian for improved cache performance
52    Input Parameters:
53 +  mat - the matrix containing the nonzero structure of the Jacobian
54 .  color - the coloring context
55 -  nz - number of local non-zeros in mat
56 */
57 PetscErrorCode MatFDColoringSetUpBlocked_AIJ_Private(Mat mat,MatFDColoring c,PetscInt nz)
58 {
59   PetscErrorCode ierr;
60   PetscInt       i,j,nrows,nbcols,brows=c->brows,bcols=c->bcols,mbs=c->m,nis=c->ncolors;
61   PetscInt       *color_start,*row_start,*nrows_new,nz_new,row_end;
62 
63   PetscFunctionBegin;
64   if (brows < 1 || brows > mbs) brows = mbs;
65   ierr = PetscMalloc2(bcols+1,&color_start,bcols,&row_start);CHKERRQ(ierr);
66   ierr = PetscCalloc1(nis,&nrows_new);CHKERRQ(ierr);
67   ierr = PetscMalloc1(bcols*mat->rmap->n,&c->dy);CHKERRQ(ierr);
68   ierr = PetscLogObjectMemory((PetscObject)c,bcols*mat->rmap->n*sizeof(PetscScalar));CHKERRQ(ierr);
69 
70   nz_new = 0;
71   nbcols = 0;
72   color_start[bcols] = 0;
73 
74   if (c->htype[0] == 'd') { /* ----  c->htype == 'ds', use MatEntry --------*/
75     MatEntry       *Jentry_new,*Jentry=c->matentry;
76     ierr = PetscMalloc1(nz,&Jentry_new);CHKERRQ(ierr);
77     for (i=0; i<nis; i+=bcols) { /* loop over colors */
78       if (i + bcols > nis) {
79         color_start[nis - i] = color_start[bcols];
80         bcols                = nis - i;
81       }
82 
83       color_start[0] = color_start[bcols];
84       for (j=0; j<bcols; j++) {
85         color_start[j+1] = c->nrows[i+j] + color_start[j];
86         row_start[j]     = 0;
87       }
88 
89       row_end = brows;
90       if (row_end > mbs) row_end = mbs;
91 
92       while (row_end <= mbs) {   /* loop over block rows */
93         for (j=0; j<bcols; j++) {       /* loop over block columns */
94           nrows = c->nrows[i+j];
95           nz    = color_start[j];
96           while (row_start[j] < nrows) {
97             if (Jentry[nz].row >= row_end) {
98               color_start[j] = nz;
99               break;
100             } else { /* copy Jentry[nz] to Jentry_new[nz_new] */
101               Jentry_new[nz_new].row     = Jentry[nz].row + j*mbs; /* index in dy-array */
102               Jentry_new[nz_new].col     = Jentry[nz].col;
103               Jentry_new[nz_new].valaddr = Jentry[nz].valaddr;
104               nz_new++; nz++; row_start[j]++;
105             }
106           }
107         }
108         if (row_end == mbs) break;
109         row_end += brows;
110         if (row_end > mbs) row_end = mbs;
111       }
112       nrows_new[nbcols++] = nz_new;
113     }
114     ierr = PetscFree(Jentry);CHKERRQ(ierr);
115     c->matentry = Jentry_new;
116   } else { /* ---------  c->htype == 'wp', use MatEntry2 ------------------*/
117     MatEntry2      *Jentry2_new,*Jentry2=c->matentry2;
118     ierr = PetscMalloc1(nz,&Jentry2_new);CHKERRQ(ierr);
119     for (i=0; i<nis; i+=bcols) { /* loop over colors */
120       if (i + bcols > nis) {
121         color_start[nis - i] = color_start[bcols];
122         bcols                = nis - i;
123       }
124 
125       color_start[0] = color_start[bcols];
126       for (j=0; j<bcols; j++) {
127         color_start[j+1] = c->nrows[i+j] + color_start[j];
128         row_start[j]     = 0;
129       }
130 
131       row_end = brows;
132       if (row_end > mbs) row_end = mbs;
133 
134       while (row_end <= mbs) {   /* loop over block rows */
135         for (j=0; j<bcols; j++) {       /* loop over block columns */
136           nrows = c->nrows[i+j];
137           nz    = color_start[j];
138           while (row_start[j] < nrows) {
139             if (Jentry2[nz].row >= row_end) {
140               color_start[j] = nz;
141               break;
142             } else { /* copy Jentry2[nz] to Jentry2_new[nz_new] */
143               Jentry2_new[nz_new].row     = Jentry2[nz].row + j*mbs; /* index in dy-array */
144               Jentry2_new[nz_new].valaddr = Jentry2[nz].valaddr;
145               nz_new++; nz++; row_start[j]++;
146             }
147           }
148         }
149         if (row_end == mbs) break;
150         row_end += brows;
151         if (row_end > mbs) row_end = mbs;
152       }
153       nrows_new[nbcols++] = nz_new;
154     }
155     ierr = PetscFree(Jentry2);CHKERRQ(ierr);
156     c->matentry2 = Jentry2_new;
157   } /* ---------------------------------------------*/
158 
159   ierr = PetscFree2(color_start,row_start);CHKERRQ(ierr);
160 
161   for (i=nbcols-1; i>0; i--) nrows_new[i] -= nrows_new[i-1];
162   ierr = PetscFree(c->nrows);CHKERRQ(ierr);
163   c->nrows = nrows_new;
164   PetscFunctionReturn(0);
165 }
166 
167 PetscErrorCode MatFDColoringSetUp_SeqXAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
168 {
169   PetscErrorCode ierr;
170   PetscInt       i,n,nrows,mbs=c->m,j,k,m,ncols,col,nis=iscoloring->n,*rowhit,bs,bs2,*spidx,nz;
171   const PetscInt *is,*row,*ci,*cj;
172   IS             *isa;
173   PetscBool      isBAIJ;
174   PetscScalar    *A_val,**valaddrhit;
175   MatEntry       *Jentry;
176   MatEntry2      *Jentry2;
177 
178   PetscFunctionBegin;
179   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
180 
181   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
182   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);
183   if (isBAIJ) {
184     Mat_SeqBAIJ *spA = (Mat_SeqBAIJ*)mat->data;
185     A_val = spA->a;
186     nz    = spA->nz;
187   } else {
188     Mat_SeqAIJ  *spA = (Mat_SeqAIJ*)mat->data;
189     A_val = spA->a;
190     nz    = spA->nz;
191     bs    = 1; /* only bs=1 is supported for SeqAIJ matrix */
192   }
193 
194   ierr       = PetscMalloc1(nis,&c->ncolumns);CHKERRQ(ierr);
195   ierr       = PetscMalloc1(nis,&c->columns);CHKERRQ(ierr);
196   ierr       = PetscMalloc1(nis,&c->nrows);CHKERRQ(ierr);
197   ierr       = PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));CHKERRQ(ierr);
198 
199   if (c->htype[0] == 'd') {
200     ierr       = PetscMalloc1(nz,&Jentry);CHKERRQ(ierr);
201     ierr       = PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry));CHKERRQ(ierr);
202     c->matentry = Jentry;
203   } else if (c->htype[0] == 'w') {
204     ierr       = PetscMalloc1(nz,&Jentry2);CHKERRQ(ierr);
205     ierr       = PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry2));CHKERRQ(ierr);
206     c->matentry2 = Jentry2;
207   } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"htype is not supported");
208 
209   if (isBAIJ) {
210     ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
211   } else {
212     ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
213   }
214 
215   ierr = PetscMalloc2(c->m,&rowhit,c->m,&valaddrhit);CHKERRQ(ierr);
216   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
217 
218   nz = 0;
219   for (i=0; i<nis; i++) { /* loop over colors */
220     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
221     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
222 
223     c->ncolumns[i] = n;
224     if (n) {
225       ierr = PetscMalloc1(n,&c->columns[i]);CHKERRQ(ierr);
226       ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
228     } else {
229       c->columns[i] = 0;
230     }
231 
232     /* fast, crude version requires O(N*N) work */
233     bs2   = bs*bs;
234     nrows = 0;
235     for (j=0; j<n; j++) {  /* loop over columns */
236       col    = is[j];
237       row    = cj + ci[col];
238       m      = ci[col+1] - ci[col];
239       nrows += m;
240       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
241         rowhit[*row]       = col + 1;
242         valaddrhit[*row++] = &A_val[bs2*spidx[ci[col] + k]];
243       }
244     }
245     c->nrows[i] = nrows; /* total num of rows for this color */
246 
247     if (c->htype[0] == 'd') {
248       for (j=0; j<mbs; j++) { /* loop over rows */
249         if (rowhit[j]) {
250           Jentry[nz].row     = j;              /* local row index */
251           Jentry[nz].col     = rowhit[j] - 1;  /* local column index */
252           Jentry[nz].valaddr = valaddrhit[j];  /* address of mat value for this entry */
253           nz++;
254           rowhit[j] = 0.0;                     /* zero rowhit for reuse */
255         }
256       }
257     }  else { /* c->htype == 'wp' */
258       for (j=0; j<mbs; j++) { /* loop over rows */
259         if (rowhit[j]) {
260           Jentry2[nz].row     = j;              /* local row index */
261           Jentry2[nz].valaddr = valaddrhit[j];  /* address of mat value for this entry */
262           nz++;
263           rowhit[j] = 0.0;                     /* zero rowhit for reuse */
264         }
265       }
266     }
267     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
268   }
269 
270   if (c->bcols > 1) {  /* reorder Jentry for faster MatFDColoringApply() */
271     ierr = MatFDColoringSetUpBlocked_AIJ_Private(mat,c,nz);CHKERRQ(ierr);
272   }
273 
274   if (isBAIJ) {
275     ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
276     ierr = PetscMalloc1(bs*mat->rmap->n,&c->dy);CHKERRQ(ierr);
277     ierr = PetscLogObjectMemory((PetscObject)c,bs*mat->rmap->n*sizeof(PetscScalar));CHKERRQ(ierr);
278   } else {
279     ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
280   }
281   ierr = PetscFree2(rowhit,valaddrhit);CHKERRQ(ierr);
282   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
283 
284   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
285   ierr = PetscInfo3(c,"ncolors %D, brows %D and bcols %D are used.\n",c->ncolors,c->brows,c->bcols);CHKERRQ(ierr);
286   PetscFunctionReturn(0);
287 }
288