1 /*
2 Defines projective product routines where A is a SeqAIJ matrix
3 C = R * A * R^T
4 */
5
6 #include <../src/mat/impls/aij/seq/aij.h>
7 #include <../src/mat/utils/freespace.h>
8 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/
9
MatProductCtxDestroy_SeqAIJ_RARt(PetscCtxRt data)10 static PetscErrorCode MatProductCtxDestroy_SeqAIJ_RARt(PetscCtxRt data)
11 {
12 MatProductCtx_RARt *rart = *(MatProductCtx_RARt **)data;
13
14 PetscFunctionBegin;
15 PetscCall(MatTransposeColoringDestroy(&rart->matcoloring));
16 PetscCall(MatDestroy(&rart->Rt));
17 PetscCall(MatDestroy(&rart->RARt));
18 PetscCall(MatDestroy(&rart->ARt));
19 PetscCall(PetscFree(rart->work));
20 if (rart->destroy) PetscCall((*rart->destroy)(&rart->data));
21 PetscCall(PetscFree(rart));
22 PetscFunctionReturn(PETSC_SUCCESS);
23 }
24
MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,PetscReal fill,Mat C)25 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, PetscReal fill, Mat C)
26 {
27 Mat P;
28 MatProductCtx_RARt *rart;
29 MatColoring coloring;
30 MatTransposeColoring matcoloring;
31 ISColoring iscoloring;
32 Mat Rt_dense, RARt_dense;
33
34 PetscFunctionBegin;
35 MatCheckProduct(C, 4);
36 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
37 /* create symbolic P=Rt */
38 PetscCall(MatTransposeSymbolic(R, &P));
39
40 /* get symbolic C=Pt*A*P */
41 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A, P, fill, C));
42 PetscCall(MatSetBlockSizes(C, R->rmap->bs, R->rmap->bs));
43 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart;
44
45 /* create a supporting struct */
46 PetscCall(PetscNew(&rart));
47 C->product->data = rart;
48 C->product->destroy = MatProductCtxDestroy_SeqAIJ_RARt;
49
50 /* Use coloring */
51 /* inode causes memory problem */
52 PetscCall(MatSetOption(C, MAT_USE_INODES, PETSC_FALSE));
53
54 /* Create MatTransposeColoring from symbolic C=R*A*R^T */
55 PetscCall(MatColoringCreate(C, &coloring));
56 PetscCall(MatColoringSetDistance(coloring, 2));
57 PetscCall(MatColoringSetType(coloring, MATCOLORINGSL));
58 PetscCall(MatColoringSetFromOptions(coloring));
59 PetscCall(MatColoringApply(coloring, &iscoloring));
60 PetscCall(MatColoringDestroy(&coloring));
61 PetscCall(MatTransposeColoringCreate(C, iscoloring, &matcoloring));
62
63 rart->matcoloring = matcoloring;
64 PetscCall(ISColoringDestroy(&iscoloring));
65
66 /* Create Rt_dense */
67 PetscCall(MatCreate(PETSC_COMM_SELF, &Rt_dense));
68 PetscCall(MatSetSizes(Rt_dense, A->cmap->n, matcoloring->ncolors, A->cmap->n, matcoloring->ncolors));
69 PetscCall(MatSetType(Rt_dense, MATSEQDENSE));
70 PetscCall(MatSeqDenseSetPreallocation(Rt_dense, NULL));
71
72 Rt_dense->assembled = PETSC_TRUE;
73 rart->Rt = Rt_dense;
74
75 /* Create RARt_dense = R*A*Rt_dense */
76 PetscCall(MatCreate(PETSC_COMM_SELF, &RARt_dense));
77 PetscCall(MatSetSizes(RARt_dense, C->rmap->n, matcoloring->ncolors, C->rmap->n, matcoloring->ncolors));
78 PetscCall(MatSetType(RARt_dense, MATSEQDENSE));
79 PetscCall(MatSeqDenseSetPreallocation(RARt_dense, NULL));
80
81 rart->RARt = RARt_dense;
82
83 /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */
84 PetscCall(PetscMalloc1(A->rmap->n * 4, &rart->work));
85
86 /* clean up */
87 PetscCall(MatDestroy(&P));
88
89 #if defined(PETSC_USE_INFO)
90 {
91 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
92 PetscReal density = (PetscReal)c->nz / (RARt_dense->rmap->n * RARt_dense->cmap->n);
93 PetscCall(PetscInfo(C, "C=R*(A*Rt) via coloring C - use sparse-dense inner products\n"));
94 PetscCall(PetscInfo(C, "RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n", RARt_dense->rmap->n, RARt_dense->cmap->n, R->cmap->n, R->rmap->n, c->nz, (double)density));
95 }
96 #endif
97 PetscFunctionReturn(PETSC_SUCCESS);
98 }
99
100 /*
101 RAB = R * A * B, R and A in seqaij format, B in dense format;
102 */
MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R,Mat A,Mat B,Mat RAB,PetscScalar * work)103 static PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R, Mat A, Mat B, Mat RAB, PetscScalar *work)
104 {
105 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *r = (Mat_SeqAIJ *)R->data;
106 PetscScalar r1, r2, r3, r4;
107 const PetscScalar *b, *b1, *b2, *b3, *b4;
108 MatScalar *aa, *ra;
109 PetscInt cn = B->cmap->n, bm = B->rmap->n, col, i, j, n, *ai = a->i, *aj, am = A->rmap->n;
110 PetscInt am2 = 2 * am, am3 = 3 * am, bm4 = 4 * bm;
111 PetscScalar *d, *c, *c2, *c3, *c4;
112 PetscInt *rj, rm = R->rmap->n, dm = RAB->rmap->n, dn = RAB->cmap->n;
113 PetscInt rm2 = 2 * rm, rm3 = 3 * rm, colrm;
114
115 PetscFunctionBegin;
116 if (!dm || !dn) PetscFunctionReturn(PETSC_SUCCESS);
117 PetscCheck(bm == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT, A->cmap->n, bm);
118 PetscCheck(am == R->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in R %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT, R->cmap->n, am);
119 PetscCheck(R->rmap->n == RAB->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows in RAB %" PetscInt_FMT " not equal rows in R %" PetscInt_FMT, RAB->rmap->n, R->rmap->n);
120 PetscCheck(B->cmap->n == RAB->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in RAB %" PetscInt_FMT " not equal columns in B %" PetscInt_FMT, RAB->cmap->n, B->cmap->n);
121
122 { /*
123 This approach is not as good as original ones (will be removed later), but it reveals that
124 AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c
125 */
126 PetscBool via_matmatmult = PETSC_FALSE;
127 PetscCall(PetscOptionsGetBool(NULL, NULL, "-matrart_via_matmatmult", &via_matmatmult, NULL));
128 if (via_matmatmult) {
129 Mat AB_den = NULL;
130 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &AB_den));
131 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqDense(A, B, 0.0, AB_den));
132 PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A, B, AB_den));
133 PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(R, AB_den, RAB));
134 PetscCall(MatDestroy(&AB_den));
135 PetscFunctionReturn(PETSC_SUCCESS);
136 }
137 }
138
139 PetscCall(MatDenseGetArrayRead(B, &b));
140 PetscCall(MatDenseGetArray(RAB, &d));
141 b1 = b;
142 b2 = b1 + bm;
143 b3 = b2 + bm;
144 b4 = b3 + bm;
145 c = work;
146 c2 = c + am;
147 c3 = c2 + am;
148 c4 = c3 + am;
149 for (col = 0; col < cn - 4; col += 4) { /* over columns of C */
150 for (i = 0; i < am; i++) { /* over rows of A in those columns */
151 r1 = r2 = r3 = r4 = 0.0;
152 n = ai[i + 1] - ai[i];
153 aj = a->j + ai[i];
154 aa = a->a + ai[i];
155 for (j = 0; j < n; j++) {
156 r1 += (*aa) * b1[*aj];
157 r2 += (*aa) * b2[*aj];
158 r3 += (*aa) * b3[*aj];
159 r4 += (*aa++) * b4[*aj++];
160 }
161 c[i] = r1;
162 c[am + i] = r2;
163 c[am2 + i] = r3;
164 c[am3 + i] = r4;
165 }
166 b1 += bm4;
167 b2 += bm4;
168 b3 += bm4;
169 b4 += bm4;
170
171 /* RAB[:,col] = R*C[:,col] */
172 colrm = col * rm;
173 for (i = 0; i < rm; i++) { /* over rows of R in those columns */
174 r1 = r2 = r3 = r4 = 0.0;
175 n = r->i[i + 1] - r->i[i];
176 rj = r->j + r->i[i];
177 ra = r->a + r->i[i];
178 for (j = 0; j < n; j++) {
179 r1 += (*ra) * c[*rj];
180 r2 += (*ra) * c2[*rj];
181 r3 += (*ra) * c3[*rj];
182 r4 += (*ra++) * c4[*rj++];
183 }
184 d[colrm + i] = r1;
185 d[colrm + rm + i] = r2;
186 d[colrm + rm2 + i] = r3;
187 d[colrm + rm3 + i] = r4;
188 }
189 }
190 for (; col < cn; col++) { /* over extra columns of C */
191 for (i = 0; i < am; i++) { /* over rows of A in those columns */
192 r1 = 0.0;
193 n = a->i[i + 1] - a->i[i];
194 aj = a->j + a->i[i];
195 aa = a->a + a->i[i];
196 for (j = 0; j < n; j++) r1 += (*aa++) * b1[*aj++];
197 c[i] = r1;
198 }
199 b1 += bm;
200
201 for (i = 0; i < rm; i++) { /* over rows of R in those columns */
202 r1 = 0.0;
203 n = r->i[i + 1] - r->i[i];
204 rj = r->j + r->i[i];
205 ra = r->a + r->i[i];
206 for (j = 0; j < n; j++) r1 += (*ra++) * c[*rj++];
207 d[col * rm + i] = r1;
208 }
209 }
210 PetscCall(PetscLogFlops(cn * 2.0 * (a->nz + r->nz)));
211
212 PetscCall(MatDenseRestoreArrayRead(B, &b));
213 PetscCall(MatDenseRestoreArray(RAB, &d));
214 PetscCall(MatAssemblyBegin(RAB, MAT_FINAL_ASSEMBLY));
215 PetscCall(MatAssemblyEnd(RAB, MAT_FINAL_ASSEMBLY));
216 PetscFunctionReturn(PETSC_SUCCESS);
217 }
218
MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,Mat C)219 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, Mat C)
220 {
221 MatProductCtx_RARt *rart;
222 MatTransposeColoring matcoloring;
223 Mat Rt, RARt;
224
225 PetscFunctionBegin;
226 MatCheckProduct(C, 3);
227 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
228 rart = (MatProductCtx_RARt *)C->product->data;
229
230 /* Get dense Rt by Apply MatTransposeColoring to R */
231 matcoloring = rart->matcoloring;
232 Rt = rart->Rt;
233 PetscCall(MatTransColoringApplySpToDen(matcoloring, R, Rt));
234
235 /* Get dense RARt = R*A*Rt -- dominates! */
236 RARt = rart->RARt;
237 PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R, A, Rt, RARt, rart->work));
238
239 /* Recover C from C_dense */
240 PetscCall(MatTransColoringApplyDenToSp(matcoloring, RARt, C));
241 PetscFunctionReturn(PETSC_SUCCESS);
242 }
243
MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,PetscReal fill,Mat C)244 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, PetscReal fill, Mat C)
245 {
246 Mat ARt;
247 MatProductCtx_RARt *rart;
248 char *alg;
249
250 PetscFunctionBegin;
251 MatCheckProduct(C, 4);
252 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
253 /* create symbolic ARt = A*R^T */
254 PetscCall(MatProductCreate(A, R, NULL, &ARt));
255 PetscCall(MatProductSetType(ARt, MATPRODUCT_ABt));
256 PetscCall(MatProductSetAlgorithm(ARt, "sorted"));
257 PetscCall(MatProductSetFill(ARt, fill));
258 PetscCall(MatProductSetFromOptions(ARt));
259 PetscCall(MatProductSymbolic(ARt));
260
261 /* compute symbolic C = R*ARt */
262 /* set algorithm for C = R*ARt */
263 PetscCall(PetscStrallocpy(C->product->alg, &alg));
264 PetscCall(MatProductSetAlgorithm(C, "sorted"));
265 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(R, ARt, fill, C));
266 /* resume original algorithm for C */
267 PetscCall(MatProductSetAlgorithm(C, alg));
268 PetscCall(PetscFree(alg));
269
270 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult;
271
272 PetscCall(PetscNew(&rart));
273 rart->ARt = ARt;
274 C->product->data = rart;
275 C->product->destroy = MatProductCtxDestroy_SeqAIJ_RARt;
276 PetscCall(PetscInfo(C, "Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n"));
277 PetscFunctionReturn(PETSC_SUCCESS);
278 }
279
MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,Mat C)280 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, Mat C)
281 {
282 MatProductCtx_RARt *rart;
283
284 PetscFunctionBegin;
285 MatCheckProduct(C, 3);
286 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
287 rart = (MatProductCtx_RARt *)C->product->data;
288 PetscCall(MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A, R, rart->ARt)); /* dominate! */
289 PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(R, rart->ARt, C));
290 PetscFunctionReturn(PETSC_SUCCESS);
291 }
292
MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat R,PetscReal fill,Mat C)293 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat R, PetscReal fill, Mat C)
294 {
295 Mat Rt;
296 MatProductCtx_RARt *rart;
297
298 PetscFunctionBegin;
299 MatCheckProduct(C, 4);
300 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
301 PetscCall(MatTranspose(R, MAT_INITIAL_MATRIX, &Rt));
302 PetscCall(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R, A, Rt, fill, C));
303
304 PetscCall(PetscNew(&rart));
305 rart->data = C->product->data;
306 rart->destroy = C->product->destroy;
307 rart->Rt = Rt;
308 C->product->data = rart;
309 C->product->destroy = MatProductCtxDestroy_SeqAIJ_RARt;
310 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ;
311 PetscCall(PetscInfo(C, "Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n"));
312 PetscFunctionReturn(PETSC_SUCCESS);
313 }
314
MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A,Mat R,Mat C)315 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A, Mat R, Mat C)
316 {
317 MatProductCtx_RARt *rart;
318
319 PetscFunctionBegin;
320 MatCheckProduct(C, 3);
321 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
322 rart = (MatProductCtx_RARt *)C->product->data;
323 PetscCall(MatTranspose(R, MAT_REUSE_MATRIX, &rart->Rt));
324 /* MatMatMatMultSymbolic used a different data */
325 C->product->data = rart->data;
326 PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R, A, rart->Rt, C));
327 C->product->data = rart;
328 PetscFunctionReturn(PETSC_SUCCESS);
329 }
330
MatRARt_SeqAIJ_SeqAIJ(Mat A,Mat R,MatReuse scall,PetscReal fill,Mat * C)331 PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A, Mat R, MatReuse scall, PetscReal fill, Mat *C)
332 {
333 const char *algTypes[3] = {"matmatmatmult", "matmattransposemult", "coloring_rart"};
334 PetscInt alg = 0; /* set default algorithm */
335
336 PetscFunctionBegin;
337 if (scall == MAT_INITIAL_MATRIX) {
338 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "MatRARt", "Mat");
339 PetscCall(PetscOptionsEList("-matrart_via", "Algorithmic approach", "MatRARt", algTypes, 3, algTypes[0], &alg, NULL));
340 PetscOptionsEnd();
341
342 PetscCall(PetscLogEventBegin(MAT_RARtSymbolic, A, R, 0, 0));
343 PetscCall(MatCreate(PETSC_COMM_SELF, C));
344 switch (alg) {
345 case 1:
346 /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */
347 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, *C));
348 break;
349 case 2:
350 /* via coloring_rart: apply coloring C = R*A*R^T */
351 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, *C));
352 break;
353 default:
354 /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */
355 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, *C));
356 break;
357 }
358 PetscCall(PetscLogEventEnd(MAT_RARtSymbolic, A, R, 0, 0));
359 }
360
361 PetscCall(PetscLogEventBegin(MAT_RARtNumeric, A, R, 0, 0));
362 PetscCall(((*C)->ops->rartnumeric)(A, R, *C));
363 PetscCall(PetscLogEventEnd(MAT_RARtNumeric, A, R, 0, 0));
364 PetscFunctionReturn(PETSC_SUCCESS);
365 }
366
MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C)367 PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C)
368 {
369 Mat_Product *product = C->product;
370 Mat A = product->A, R = product->B;
371 MatProductAlgorithm alg = product->alg;
372 PetscReal fill = product->fill;
373 PetscBool flg;
374
375 PetscFunctionBegin;
376 PetscCall(PetscStrcmp(alg, "r*a*rt", &flg));
377 if (flg) {
378 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, C));
379 goto next;
380 }
381
382 PetscCall(PetscStrcmp(alg, "r*art", &flg));
383 if (flg) {
384 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, C));
385 goto next;
386 }
387
388 PetscCall(PetscStrcmp(alg, "coloring_rart", &flg));
389 if (flg) {
390 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, C));
391 goto next;
392 }
393
394 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductAlgorithm is not supported");
395
396 next:
397 C->ops->productnumeric = MatProductNumeric_RARt;
398 PetscFunctionReturn(PETSC_SUCCESS);
399 }
400