xref: /petsc/src/mat/impls/aij/seq/matrart.c (revision 4e8208cbcbc709572b8abe32f33c78b69c819375)
1 /*
2   Defines projective product routines where A is a SeqAIJ matrix
3           C = R * A * R^T
4 */
5 
6 #include <../src/mat/impls/aij/seq/aij.h>
7 #include <../src/mat/utils/freespace.h>
8 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/
9 
MatProductCtxDestroy_SeqAIJ_RARt(PetscCtxRt data)10 static PetscErrorCode MatProductCtxDestroy_SeqAIJ_RARt(PetscCtxRt data)
11 {
12   MatProductCtx_RARt *rart = *(MatProductCtx_RARt **)data;
13 
14   PetscFunctionBegin;
15   PetscCall(MatTransposeColoringDestroy(&rart->matcoloring));
16   PetscCall(MatDestroy(&rart->Rt));
17   PetscCall(MatDestroy(&rart->RARt));
18   PetscCall(MatDestroy(&rart->ARt));
19   PetscCall(PetscFree(rart->work));
20   if (rart->destroy) PetscCall((*rart->destroy)(&rart->data));
21   PetscCall(PetscFree(rart));
22   PetscFunctionReturn(PETSC_SUCCESS);
23 }
24 
MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,PetscReal fill,Mat C)25 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, PetscReal fill, Mat C)
26 {
27   Mat                  P;
28   MatProductCtx_RARt  *rart;
29   MatColoring          coloring;
30   MatTransposeColoring matcoloring;
31   ISColoring           iscoloring;
32   Mat                  Rt_dense, RARt_dense;
33 
34   PetscFunctionBegin;
35   MatCheckProduct(C, 4);
36   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
37   /* create symbolic P=Rt */
38   PetscCall(MatTransposeSymbolic(R, &P));
39 
40   /* get symbolic C=Pt*A*P */
41   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A, P, fill, C));
42   PetscCall(MatSetBlockSizes(C, R->rmap->bs, R->rmap->bs));
43   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart;
44 
45   /* create a supporting struct */
46   PetscCall(PetscNew(&rart));
47   C->product->data    = rart;
48   C->product->destroy = MatProductCtxDestroy_SeqAIJ_RARt;
49 
50   /* Use coloring  */
51   /* inode causes memory problem */
52   PetscCall(MatSetOption(C, MAT_USE_INODES, PETSC_FALSE));
53 
54   /* Create MatTransposeColoring from symbolic C=R*A*R^T */
55   PetscCall(MatColoringCreate(C, &coloring));
56   PetscCall(MatColoringSetDistance(coloring, 2));
57   PetscCall(MatColoringSetType(coloring, MATCOLORINGSL));
58   PetscCall(MatColoringSetFromOptions(coloring));
59   PetscCall(MatColoringApply(coloring, &iscoloring));
60   PetscCall(MatColoringDestroy(&coloring));
61   PetscCall(MatTransposeColoringCreate(C, iscoloring, &matcoloring));
62 
63   rart->matcoloring = matcoloring;
64   PetscCall(ISColoringDestroy(&iscoloring));
65 
66   /* Create Rt_dense */
67   PetscCall(MatCreate(PETSC_COMM_SELF, &Rt_dense));
68   PetscCall(MatSetSizes(Rt_dense, A->cmap->n, matcoloring->ncolors, A->cmap->n, matcoloring->ncolors));
69   PetscCall(MatSetType(Rt_dense, MATSEQDENSE));
70   PetscCall(MatSeqDenseSetPreallocation(Rt_dense, NULL));
71 
72   Rt_dense->assembled = PETSC_TRUE;
73   rart->Rt            = Rt_dense;
74 
75   /* Create RARt_dense = R*A*Rt_dense */
76   PetscCall(MatCreate(PETSC_COMM_SELF, &RARt_dense));
77   PetscCall(MatSetSizes(RARt_dense, C->rmap->n, matcoloring->ncolors, C->rmap->n, matcoloring->ncolors));
78   PetscCall(MatSetType(RARt_dense, MATSEQDENSE));
79   PetscCall(MatSeqDenseSetPreallocation(RARt_dense, NULL));
80 
81   rart->RARt = RARt_dense;
82 
83   /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */
84   PetscCall(PetscMalloc1(A->rmap->n * 4, &rart->work));
85 
86   /* clean up */
87   PetscCall(MatDestroy(&P));
88 
89 #if defined(PETSC_USE_INFO)
90   {
91     Mat_SeqAIJ *c       = (Mat_SeqAIJ *)C->data;
92     PetscReal   density = (PetscReal)c->nz / (RARt_dense->rmap->n * RARt_dense->cmap->n);
93     PetscCall(PetscInfo(C, "C=R*(A*Rt) via coloring C - use sparse-dense inner products\n"));
94     PetscCall(PetscInfo(C, "RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n", RARt_dense->rmap->n, RARt_dense->cmap->n, R->cmap->n, R->rmap->n, c->nz, (double)density));
95   }
96 #endif
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 /*
101  RAB = R * A * B, R and A in seqaij format, B in dense format;
102 */
MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R,Mat A,Mat B,Mat RAB,PetscScalar * work)103 static PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R, Mat A, Mat B, Mat RAB, PetscScalar *work)
104 {
105   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *r = (Mat_SeqAIJ *)R->data;
106   PetscScalar        r1, r2, r3, r4;
107   const PetscScalar *b, *b1, *b2, *b3, *b4;
108   MatScalar         *aa, *ra;
109   PetscInt           cn = B->cmap->n, bm = B->rmap->n, col, i, j, n, *ai = a->i, *aj, am = A->rmap->n;
110   PetscInt           am2 = 2 * am, am3 = 3 * am, bm4 = 4 * bm;
111   PetscScalar       *d, *c, *c2, *c3, *c4;
112   PetscInt          *rj, rm = R->rmap->n, dm = RAB->rmap->n, dn = RAB->cmap->n;
113   PetscInt           rm2 = 2 * rm, rm3 = 3 * rm, colrm;
114 
115   PetscFunctionBegin;
116   if (!dm || !dn) PetscFunctionReturn(PETSC_SUCCESS);
117   PetscCheck(bm == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT, A->cmap->n, bm);
118   PetscCheck(am == R->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in R %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT, R->cmap->n, am);
119   PetscCheck(R->rmap->n == RAB->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows in RAB %" PetscInt_FMT " not equal rows in R %" PetscInt_FMT, RAB->rmap->n, R->rmap->n);
120   PetscCheck(B->cmap->n == RAB->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in RAB %" PetscInt_FMT " not equal columns in B %" PetscInt_FMT, RAB->cmap->n, B->cmap->n);
121 
122   { /*
123      This approach is not as good as original ones (will be removed later), but it reveals that
124      AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c
125      */
126     PetscBool via_matmatmult = PETSC_FALSE;
127     PetscCall(PetscOptionsGetBool(NULL, NULL, "-matrart_via_matmatmult", &via_matmatmult, NULL));
128     if (via_matmatmult) {
129       Mat AB_den = NULL;
130       PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &AB_den));
131       PetscCall(MatMatMultSymbolic_SeqAIJ_SeqDense(A, B, 0.0, AB_den));
132       PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A, B, AB_den));
133       PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(R, AB_den, RAB));
134       PetscCall(MatDestroy(&AB_den));
135       PetscFunctionReturn(PETSC_SUCCESS);
136     }
137   }
138 
139   PetscCall(MatDenseGetArrayRead(B, &b));
140   PetscCall(MatDenseGetArray(RAB, &d));
141   b1 = b;
142   b2 = b1 + bm;
143   b3 = b2 + bm;
144   b4 = b3 + bm;
145   c  = work;
146   c2 = c + am;
147   c3 = c2 + am;
148   c4 = c3 + am;
149   for (col = 0; col < cn - 4; col += 4) { /* over columns of C */
150     for (i = 0; i < am; i++) {            /* over rows of A in those columns */
151       r1 = r2 = r3 = r4 = 0.0;
152       n                 = ai[i + 1] - ai[i];
153       aj                = a->j + ai[i];
154       aa                = a->a + ai[i];
155       for (j = 0; j < n; j++) {
156         r1 += (*aa) * b1[*aj];
157         r2 += (*aa) * b2[*aj];
158         r3 += (*aa) * b3[*aj];
159         r4 += (*aa++) * b4[*aj++];
160       }
161       c[i]       = r1;
162       c[am + i]  = r2;
163       c[am2 + i] = r3;
164       c[am3 + i] = r4;
165     }
166     b1 += bm4;
167     b2 += bm4;
168     b3 += bm4;
169     b4 += bm4;
170 
171     /* RAB[:,col] = R*C[:,col] */
172     colrm = col * rm;
173     for (i = 0; i < rm; i++) { /* over rows of R in those columns */
174       r1 = r2 = r3 = r4 = 0.0;
175       n                 = r->i[i + 1] - r->i[i];
176       rj                = r->j + r->i[i];
177       ra                = r->a + r->i[i];
178       for (j = 0; j < n; j++) {
179         r1 += (*ra) * c[*rj];
180         r2 += (*ra) * c2[*rj];
181         r3 += (*ra) * c3[*rj];
182         r4 += (*ra++) * c4[*rj++];
183       }
184       d[colrm + i]       = r1;
185       d[colrm + rm + i]  = r2;
186       d[colrm + rm2 + i] = r3;
187       d[colrm + rm3 + i] = r4;
188     }
189   }
190   for (; col < cn; col++) {    /* over extra columns of C */
191     for (i = 0; i < am; i++) { /* over rows of A in those columns */
192       r1 = 0.0;
193       n  = a->i[i + 1] - a->i[i];
194       aj = a->j + a->i[i];
195       aa = a->a + a->i[i];
196       for (j = 0; j < n; j++) r1 += (*aa++) * b1[*aj++];
197       c[i] = r1;
198     }
199     b1 += bm;
200 
201     for (i = 0; i < rm; i++) { /* over rows of R in those columns */
202       r1 = 0.0;
203       n  = r->i[i + 1] - r->i[i];
204       rj = r->j + r->i[i];
205       ra = r->a + r->i[i];
206       for (j = 0; j < n; j++) r1 += (*ra++) * c[*rj++];
207       d[col * rm + i] = r1;
208     }
209   }
210   PetscCall(PetscLogFlops(cn * 2.0 * (a->nz + r->nz)));
211 
212   PetscCall(MatDenseRestoreArrayRead(B, &b));
213   PetscCall(MatDenseRestoreArray(RAB, &d));
214   PetscCall(MatAssemblyBegin(RAB, MAT_FINAL_ASSEMBLY));
215   PetscCall(MatAssemblyEnd(RAB, MAT_FINAL_ASSEMBLY));
216   PetscFunctionReturn(PETSC_SUCCESS);
217 }
218 
MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,Mat C)219 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, Mat C)
220 {
221   MatProductCtx_RARt  *rart;
222   MatTransposeColoring matcoloring;
223   Mat                  Rt, RARt;
224 
225   PetscFunctionBegin;
226   MatCheckProduct(C, 3);
227   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
228   rart = (MatProductCtx_RARt *)C->product->data;
229 
230   /* Get dense Rt by Apply MatTransposeColoring to R */
231   matcoloring = rart->matcoloring;
232   Rt          = rart->Rt;
233   PetscCall(MatTransColoringApplySpToDen(matcoloring, R, Rt));
234 
235   /* Get dense RARt = R*A*Rt -- dominates! */
236   RARt = rart->RARt;
237   PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R, A, Rt, RARt, rart->work));
238 
239   /* Recover C from C_dense */
240   PetscCall(MatTransColoringApplyDenToSp(matcoloring, RARt, C));
241   PetscFunctionReturn(PETSC_SUCCESS);
242 }
243 
MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,PetscReal fill,Mat C)244 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, PetscReal fill, Mat C)
245 {
246   Mat                 ARt;
247   MatProductCtx_RARt *rart;
248   char               *alg;
249 
250   PetscFunctionBegin;
251   MatCheckProduct(C, 4);
252   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
253   /* create symbolic ARt = A*R^T  */
254   PetscCall(MatProductCreate(A, R, NULL, &ARt));
255   PetscCall(MatProductSetType(ARt, MATPRODUCT_ABt));
256   PetscCall(MatProductSetAlgorithm(ARt, "sorted"));
257   PetscCall(MatProductSetFill(ARt, fill));
258   PetscCall(MatProductSetFromOptions(ARt));
259   PetscCall(MatProductSymbolic(ARt));
260 
261   /* compute symbolic C = R*ARt */
262   /* set algorithm for C = R*ARt */
263   PetscCall(PetscStrallocpy(C->product->alg, &alg));
264   PetscCall(MatProductSetAlgorithm(C, "sorted"));
265   PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(R, ARt, fill, C));
266   /* resume original algorithm for C */
267   PetscCall(MatProductSetAlgorithm(C, alg));
268   PetscCall(PetscFree(alg));
269 
270   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult;
271 
272   PetscCall(PetscNew(&rart));
273   rart->ARt           = ARt;
274   C->product->data    = rart;
275   C->product->destroy = MatProductCtxDestroy_SeqAIJ_RARt;
276   PetscCall(PetscInfo(C, "Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n"));
277   PetscFunctionReturn(PETSC_SUCCESS);
278 }
279 
MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,Mat C)280 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, Mat C)
281 {
282   MatProductCtx_RARt *rart;
283 
284   PetscFunctionBegin;
285   MatCheckProduct(C, 3);
286   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
287   rart = (MatProductCtx_RARt *)C->product->data;
288   PetscCall(MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A, R, rart->ARt)); /* dominate! */
289   PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(R, rart->ARt, C));
290   PetscFunctionReturn(PETSC_SUCCESS);
291 }
292 
MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat R,PetscReal fill,Mat C)293 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat R, PetscReal fill, Mat C)
294 {
295   Mat                 Rt;
296   MatProductCtx_RARt *rart;
297 
298   PetscFunctionBegin;
299   MatCheckProduct(C, 4);
300   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
301   PetscCall(MatTranspose(R, MAT_INITIAL_MATRIX, &Rt));
302   PetscCall(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R, A, Rt, fill, C));
303 
304   PetscCall(PetscNew(&rart));
305   rart->data          = C->product->data;
306   rart->destroy       = C->product->destroy;
307   rart->Rt            = Rt;
308   C->product->data    = rart;
309   C->product->destroy = MatProductCtxDestroy_SeqAIJ_RARt;
310   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ;
311   PetscCall(PetscInfo(C, "Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n"));
312   PetscFunctionReturn(PETSC_SUCCESS);
313 }
314 
MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A,Mat R,Mat C)315 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A, Mat R, Mat C)
316 {
317   MatProductCtx_RARt *rart;
318 
319   PetscFunctionBegin;
320   MatCheckProduct(C, 3);
321   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
322   rart = (MatProductCtx_RARt *)C->product->data;
323   PetscCall(MatTranspose(R, MAT_REUSE_MATRIX, &rart->Rt));
324   /* MatMatMatMultSymbolic used a different data */
325   C->product->data = rart->data;
326   PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R, A, rart->Rt, C));
327   C->product->data = rart;
328   PetscFunctionReturn(PETSC_SUCCESS);
329 }
330 
MatRARt_SeqAIJ_SeqAIJ(Mat A,Mat R,MatReuse scall,PetscReal fill,Mat * C)331 PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A, Mat R, MatReuse scall, PetscReal fill, Mat *C)
332 {
333   const char *algTypes[3] = {"matmatmatmult", "matmattransposemult", "coloring_rart"};
334   PetscInt    alg         = 0; /* set default algorithm */
335 
336   PetscFunctionBegin;
337   if (scall == MAT_INITIAL_MATRIX) {
338     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "MatRARt", "Mat");
339     PetscCall(PetscOptionsEList("-matrart_via", "Algorithmic approach", "MatRARt", algTypes, 3, algTypes[0], &alg, NULL));
340     PetscOptionsEnd();
341 
342     PetscCall(PetscLogEventBegin(MAT_RARtSymbolic, A, R, 0, 0));
343     PetscCall(MatCreate(PETSC_COMM_SELF, C));
344     switch (alg) {
345     case 1:
346       /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */
347       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, *C));
348       break;
349     case 2:
350       /* via coloring_rart: apply coloring C = R*A*R^T                          */
351       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, *C));
352       break;
353     default:
354       /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */
355       PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, *C));
356       break;
357     }
358     PetscCall(PetscLogEventEnd(MAT_RARtSymbolic, A, R, 0, 0));
359   }
360 
361   PetscCall(PetscLogEventBegin(MAT_RARtNumeric, A, R, 0, 0));
362   PetscCall(((*C)->ops->rartnumeric)(A, R, *C));
363   PetscCall(PetscLogEventEnd(MAT_RARtNumeric, A, R, 0, 0));
364   PetscFunctionReturn(PETSC_SUCCESS);
365 }
366 
MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C)367 PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C)
368 {
369   Mat_Product        *product = C->product;
370   Mat                 A = product->A, R = product->B;
371   MatProductAlgorithm alg  = product->alg;
372   PetscReal           fill = product->fill;
373   PetscBool           flg;
374 
375   PetscFunctionBegin;
376   PetscCall(PetscStrcmp(alg, "r*a*rt", &flg));
377   if (flg) {
378     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, C));
379     goto next;
380   }
381 
382   PetscCall(PetscStrcmp(alg, "r*art", &flg));
383   if (flg) {
384     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, C));
385     goto next;
386   }
387 
388   PetscCall(PetscStrcmp(alg, "coloring_rart", &flg));
389   if (flg) {
390     PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, C));
391     goto next;
392   }
393 
394   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductAlgorithm is not supported");
395 
396 next:
397   C->ops->productnumeric = MatProductNumeric_RARt;
398   PetscFunctionReturn(PETSC_SUCCESS);
399 }
400