xref: /petsc/src/mat/impls/aij/seq/matrart.c (revision 58d68138c660dfb4e9f5b03334792cd4f2ffd7cc)
1 
2 /*
3   Defines projective product routines where A is a SeqAIJ matrix
4           C = R * A * R^T
5 */
6 
7 #include <../src/mat/impls/aij/seq/aij.h>
8 #include <../src/mat/utils/freespace.h>
9 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/
10 
11 PetscErrorCode MatDestroy_SeqAIJ_RARt(void *data) {
12   Mat_RARt *rart = (Mat_RARt *)data;
13 
14   PetscFunctionBegin;
15   PetscCall(MatTransposeColoringDestroy(&rart->matcoloring));
16   PetscCall(MatDestroy(&rart->Rt));
17   PetscCall(MatDestroy(&rart->RARt));
18   PetscCall(MatDestroy(&rart->ARt));
19   PetscCall(PetscFree(rart->work));
20   if (rart->destroy) PetscCall((*rart->destroy)(rart->data));
21   PetscCall(PetscFree(rart));
22   PetscFunctionReturn(0);
23 }
24 
25 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, PetscReal fill, Mat C) {
26   Mat                  P;
27   Mat_RARt            *rart;
28   MatColoring          coloring;
29   MatTransposeColoring matcoloring;
30   ISColoring           iscoloring;
31   Mat                  Rt_dense, RARt_dense;
32 
33   PetscFunctionBegin;
34   MatCheckProduct(C, 4);
35   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
36   /* create symbolic P=Rt */
37   PetscCall(MatTransposeSymbolic(R, &P));
38 
39   /* get symbolic C=Pt*A*P */
40   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A, P, fill, C));
41   PetscCall(MatSetBlockSizes(C, PetscAbs(R->rmap->bs), PetscAbs(R->rmap->bs)));
42   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart;
43 
44   /* create a supporting struct */
45   PetscCall(PetscNew(&rart));
46   C->product->data    = rart;
47   C->product->destroy = MatDestroy_SeqAIJ_RARt;
48 
49   /* ------ Use coloring ---------- */
50   /* inode causes memory problem */
51   PetscCall(MatSetOption(C, MAT_USE_INODES, PETSC_FALSE));
52 
53   /* Create MatTransposeColoring from symbolic C=R*A*R^T */
54   PetscCall(MatColoringCreate(C, &coloring));
55   PetscCall(MatColoringSetDistance(coloring, 2));
56   PetscCall(MatColoringSetType(coloring, MATCOLORINGSL));
57   PetscCall(MatColoringSetFromOptions(coloring));
58   PetscCall(MatColoringApply(coloring, &iscoloring));
59   PetscCall(MatColoringDestroy(&coloring));
60   PetscCall(MatTransposeColoringCreate(C, iscoloring, &matcoloring));
61 
62   rart->matcoloring = matcoloring;
63   PetscCall(ISColoringDestroy(&iscoloring));
64 
65   /* Create Rt_dense */
66   PetscCall(MatCreate(PETSC_COMM_SELF, &Rt_dense));
67   PetscCall(MatSetSizes(Rt_dense, A->cmap->n, matcoloring->ncolors, A->cmap->n, matcoloring->ncolors));
68   PetscCall(MatSetType(Rt_dense, MATSEQDENSE));
69   PetscCall(MatSeqDenseSetPreallocation(Rt_dense, NULL));
70 
71   Rt_dense->assembled = PETSC_TRUE;
72   rart->Rt            = Rt_dense;
73 
74   /* Create RARt_dense = R*A*Rt_dense */
75   PetscCall(MatCreate(PETSC_COMM_SELF, &RARt_dense));
76   PetscCall(MatSetSizes(RARt_dense, C->rmap->n, matcoloring->ncolors, C->rmap->n, matcoloring->ncolors));
77   PetscCall(MatSetType(RARt_dense, MATSEQDENSE));
78   PetscCall(MatSeqDenseSetPreallocation(RARt_dense, NULL));
79 
80   rart->RARt = RARt_dense;
81 
82   /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */
83   PetscCall(PetscMalloc1(A->rmap->n * 4, &rart->work));
84 
85   /* clean up */
86   PetscCall(MatDestroy(&P));
87 
88 #if defined(PETSC_USE_INFO)
89   {
90     Mat_SeqAIJ *c       = (Mat_SeqAIJ *)C->data;
91     PetscReal   density = (PetscReal)(c->nz) / (RARt_dense->rmap->n * RARt_dense->cmap->n);
92     PetscCall(PetscInfo(C, "C=R*(A*Rt) via coloring C - use sparse-dense inner products\n"));
93     PetscCall(PetscInfo(C, "RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n", RARt_dense->rmap->n, RARt_dense->cmap->n, R->cmap->n, R->rmap->n, c->nz, (double)density));
94   }
95 #endif
96   PetscFunctionReturn(0);
97 }
98 
99 /*
100  RAB = R * A * B, R and A in seqaij format, B in dense format;
101 */
102 PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R, Mat A, Mat B, Mat RAB, PetscScalar *work) {
103   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *r = (Mat_SeqAIJ *)R->data;
104   PetscScalar        r1, r2, r3, r4;
105   const PetscScalar *b, *b1, *b2, *b3, *b4;
106   MatScalar         *aa, *ra;
107   PetscInt           cn = B->cmap->n, bm = B->rmap->n, col, i, j, n, *ai = a->i, *aj, am = A->rmap->n;
108   PetscInt           am2 = 2 * am, am3 = 3 * am, bm4 = 4 * bm;
109   PetscScalar       *d, *c, *c2, *c3, *c4;
110   PetscInt          *rj, rm = R->rmap->n, dm = RAB->rmap->n, dn = RAB->cmap->n;
111   PetscInt           rm2 = 2 * rm, rm3 = 3 * rm, colrm;
112 
113   PetscFunctionBegin;
114   if (!dm || !dn) PetscFunctionReturn(0);
115   PetscCheck(bm == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT, A->cmap->n, bm);
116   PetscCheck(am == R->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in R %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT, R->cmap->n, am);
117   PetscCheck(R->rmap->n == RAB->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows in RAB %" PetscInt_FMT " not equal rows in R %" PetscInt_FMT, RAB->rmap->n, R->rmap->n);
118   PetscCheck(B->cmap->n == RAB->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in RAB %" PetscInt_FMT " not equal columns in B %" PetscInt_FMT, RAB->cmap->n, B->cmap->n);
119 
120   { /*
121      This approach is not as good as original ones (will be removed later), but it reveals that
122      AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c
123      */
124     PetscBool via_matmatmult = PETSC_FALSE;
125     PetscCall(PetscOptionsGetBool(NULL, NULL, "-matrart_via_matmatmult", &via_matmatmult, NULL));
126     if (via_matmatmult) {
127       Mat AB_den = NULL;
128       PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &AB_den));
129       PetscCall(MatMatMultSymbolic_SeqAIJ_SeqDense(A, B, 0.0, AB_den));
130       PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A, B, AB_den));
131       PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(R, AB_den, RAB));
132       PetscCall(MatDestroy(&AB_den));
133       PetscFunctionReturn(0);
134     }
135   }
136 
137   PetscCall(MatDenseGetArrayRead(B, &b));
138   PetscCall(MatDenseGetArray(RAB, &d));
139   b1 = b;
140   b2 = b1 + bm;
141   b3 = b2 + bm;
142   b4 = b3 + bm;
143   c  = work;
144   c2 = c + am;
145   c3 = c2 + am;
146   c4 = c3 + am;
147   for (col = 0; col < cn - 4; col += 4) { /* over columns of C */
148     for (i = 0; i < am; i++) {            /* over rows of A in those columns */
149       r1 = r2 = r3 = r4 = 0.0;
150       n                 = ai[i + 1] - ai[i];
151       aj                = a->j + ai[i];
152       aa                = a->a + ai[i];
153       for (j = 0; j < n; j++) {
154         r1 += (*aa) * b1[*aj];
155         r2 += (*aa) * b2[*aj];
156         r3 += (*aa) * b3[*aj];
157         r4 += (*aa++) * b4[*aj++];
158       }
159       c[i]       = r1;
160       c[am + i]  = r2;
161       c[am2 + i] = r3;
162       c[am3 + i] = r4;
163     }
164     b1 += bm4;
165     b2 += bm4;
166     b3 += bm4;
167     b4 += bm4;
168 
169     /* RAB[:,col] = R*C[:,col] */
170     colrm = col * rm;
171     for (i = 0; i < rm; i++) { /* over rows of R in those columns */
172       r1 = r2 = r3 = r4 = 0.0;
173       n                 = r->i[i + 1] - r->i[i];
174       rj                = r->j + r->i[i];
175       ra                = r->a + r->i[i];
176       for (j = 0; j < n; j++) {
177         r1 += (*ra) * c[*rj];
178         r2 += (*ra) * c2[*rj];
179         r3 += (*ra) * c3[*rj];
180         r4 += (*ra++) * c4[*rj++];
181       }
182       d[colrm + i]       = r1;
183       d[colrm + rm + i]  = r2;
184       d[colrm + rm2 + i] = r3;
185       d[colrm + rm3 + i] = r4;
186     }
187   }
188   for (; col < cn; col++) {    /* over extra columns of C */
189     for (i = 0; i < am; i++) { /* over rows of A in those columns */
190       r1 = 0.0;
191       n  = a->i[i + 1] - a->i[i];
192       aj = a->j + a->i[i];
193       aa = a->a + a->i[i];
194       for (j = 0; j < n; j++) { r1 += (*aa++) * b1[*aj++]; }
195       c[i] = r1;
196     }
197     b1 += bm;
198 
199     for (i = 0; i < rm; i++) { /* over rows of R in those columns */
200       r1 = 0.0;
201       n  = r->i[i + 1] - r->i[i];
202       rj = r->j + r->i[i];
203       ra = r->a + r->i[i];
204       for (j = 0; j < n; j++) { r1 += (*ra++) * c[*rj++]; }
205       d[col * rm + i] = r1;
206     }
207   }
208   PetscCall(PetscLogFlops(cn * 2.0 * (a->nz + r->nz)));
209 
210   PetscCall(MatDenseRestoreArrayRead(B, &b));
211   PetscCall(MatDenseRestoreArray(RAB, &d));
212   PetscCall(MatAssemblyBegin(RAB, MAT_FINAL_ASSEMBLY));
213   PetscCall(MatAssemblyEnd(RAB, MAT_FINAL_ASSEMBLY));
214   PetscFunctionReturn(0);
215 }
216 
217 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, Mat C) {
218   Mat_RARt            *rart;
219   MatTransposeColoring matcoloring;
220   Mat                  Rt, RARt;
221 
222   PetscFunctionBegin;
223   MatCheckProduct(C, 3);
224   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
225   rart = (Mat_RARt *)C->product->data;
226 
227   /* Get dense Rt by Apply MatTransposeColoring to R */
228   matcoloring = rart->matcoloring;
229   Rt          = rart->Rt;
230   PetscCall(MatTransColoringApplySpToDen(matcoloring, R, Rt));
231 
232   /* Get dense RARt = R*A*Rt -- dominates! */
233   RARt = rart->RARt;
234   PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R, A, Rt, RARt, rart->work));
235 
236   /* Recover C from C_dense */
237   PetscCall(MatTransColoringApplyDenToSp(matcoloring, RARt, C));
238   PetscFunctionReturn(0);
239 }
240 
241 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, PetscReal fill, Mat C) {
242   Mat       ARt;
243   Mat_RARt *rart;
244   char     *alg;
245 
246   PetscFunctionBegin;
247   MatCheckProduct(C, 4);
248   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
249   /* create symbolic ARt = A*R^T  */
250   PetscCall(MatProductCreate(A, R, NULL, &ARt));
251   PetscCall(MatProductSetType(ARt, MATPRODUCT_ABt));
252   PetscCall(MatProductSetAlgorithm(ARt, "sorted"));
253   PetscCall(MatProductSetFill(ARt, fill));
254   PetscCall(MatProductSetFromOptions(ARt));
255   PetscCall(MatProductSymbolic(ARt));
256 
257   /* compute symbolic C = R*ARt */
258   /* set algorithm for C = R*ARt */
259   PetscCall(PetscStrallocpy(C->product->alg, &alg));
260   PetscCall(MatProductSetAlgorithm(C, "sorted"));
261   PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(R, ARt, fill, C));
262   /* resume original algorithm for C */
263   PetscCall(MatProductSetAlgorithm(C, alg));
264   PetscCall(PetscFree(alg));
265 
266   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult;
267 
268   PetscCall(PetscNew(&rart));
269   rart->ARt           = ARt;
270   C->product->data    = rart;
271   C->product->destroy = MatDestroy_SeqAIJ_RARt;
272   PetscCall(PetscInfo(C, "Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n"));
273   PetscFunctionReturn(0);
274 }
275 
276 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, Mat C) {
277   Mat_RARt *rart;
278 
279   PetscFunctionBegin;
280   MatCheckProduct(C, 3);
281   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
282   rart = (Mat_RARt *)C->product->data;
283   PetscCall(MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A, R, rart->ARt)); /* dominate! */
284   PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(R, rart->ARt, C));
285   PetscFunctionReturn(0);
286 }
287 
288 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat R, PetscReal fill, Mat C) {
289   Mat       Rt;
290   Mat_RARt *rart;
291 
292   PetscFunctionBegin;
293   MatCheckProduct(C, 4);
294   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
295   PetscCall(MatTranspose(R, MAT_INITIAL_MATRIX, &Rt));
296   PetscCall(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R, A, Rt, fill, C));
297 
298   PetscCall(PetscNew(&rart));
299   rart->data          = C->product->data;
300   rart->destroy       = C->product->destroy;
301   rart->Rt            = Rt;
302   C->product->data    = rart;
303   C->product->destroy = MatDestroy_SeqAIJ_RARt;
304   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ;
305   PetscCall(PetscInfo(C, "Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n"));
306   PetscFunctionReturn(0);
307 }
308 
309 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A, Mat R, Mat C) {
310   Mat_RARt *rart;
311 
312   PetscFunctionBegin;
313   MatCheckProduct(C, 3);
314   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
315   rart = (Mat_RARt *)C->product->data;
316   PetscCall(MatTranspose(R, MAT_REUSE_MATRIX, &rart->Rt));
317   /* MatMatMatMultSymbolic used a different data */
318   C->product->data = rart->data;
319   PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R, A, rart->Rt, C));
320   C->product->data = rart;
321   PetscFunctionReturn(0);
322 }
323 
324 PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A, Mat R, MatReuse scall, PetscReal fill, Mat *C) {
325   const char *algTypes[3] = {"matmatmatmult", "matmattransposemult", "coloring_rart"};
326   PetscInt    alg         = 0; /* set default algorithm */
327 
328   PetscFunctionBegin;
329   if (scall == MAT_INITIAL_MATRIX) {
330     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "MatRARt", "Mat");
331     PetscCall(PetscOptionsEList("-matrart_via", "Algorithmic approach", "MatRARt", algTypes, 3, algTypes[0], &alg, NULL));
332     PetscOptionsEnd();
333 
334     PetscCall(PetscLogEventBegin(MAT_RARtSymbolic, A, R, 0, 0));
335     PetscCall(MatCreate(PETSC_COMM_SELF, C));
336     switch (alg) {
337     case 1:
338       /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */
339       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, *C));
340       break;
341     case 2:
342       /* via coloring_rart: apply coloring C = R*A*R^T                          */
343       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, *C));
344       break;
345     default:
346       /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */
347       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, *C));
348       break;
349     }
350     PetscCall(PetscLogEventEnd(MAT_RARtSymbolic, A, R, 0, 0));
351   }
352 
353   PetscCall(PetscLogEventBegin(MAT_RARtNumeric, A, R, 0, 0));
354   PetscCall(((*C)->ops->rartnumeric)(A, R, *C));
355   PetscCall(PetscLogEventEnd(MAT_RARtNumeric, A, R, 0, 0));
356   PetscFunctionReturn(0);
357 }
358 
359 /* ------------------------------------------------------------- */
360 PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C) {
361   Mat_Product        *product = C->product;
362   Mat                 A = product->A, R = product->B;
363   MatProductAlgorithm alg  = product->alg;
364   PetscReal           fill = product->fill;
365   PetscBool           flg;
366 
367   PetscFunctionBegin;
368   PetscCall(PetscStrcmp(alg, "r*a*rt", &flg));
369   if (flg) {
370     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, C));
371     goto next;
372   }
373 
374   PetscCall(PetscStrcmp(alg, "r*art", &flg));
375   if (flg) {
376     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, C));
377     goto next;
378   }
379 
380   PetscCall(PetscStrcmp(alg, "coloring_rart", &flg));
381   if (flg) {
382     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, C));
383     goto next;
384   }
385 
386   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductAlgorithm is not supported");
387 
388 next:
389   C->ops->productnumeric = MatProductNumeric_RARt;
390   PetscFunctionReturn(0);
391 }
392