13b1b9624SHong Zhang /*
23b1b9624SHong Zhang Defines projective product routines where A is a SeqAIJ matrix
33b1b9624SHong Zhang C = R * A * R^T
43b1b9624SHong Zhang */
53b1b9624SHong Zhang
6807171c5SHong Zhang #include <../src/mat/impls/aij/seq/aij.h>
7807171c5SHong Zhang #include <../src/mat/utils/freespace.h>
8807171c5SHong Zhang #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/
9807171c5SHong Zhang
MatProductCtxDestroy_SeqAIJ_RARt(PetscCtxRt data)10*2a8381b2SBarry Smith static PetscErrorCode MatProductCtxDestroy_SeqAIJ_RARt(PetscCtxRt data)
11d71ae5a4SJacob Faibussowitsch {
12cc1eb50dSBarry Smith MatProductCtx_RARt *rart = *(MatProductCtx_RARt **)data;
13807171c5SHong Zhang
14807171c5SHong Zhang PetscFunctionBegin;
159566063dSJacob Faibussowitsch PetscCall(MatTransposeColoringDestroy(&rart->matcoloring));
169566063dSJacob Faibussowitsch PetscCall(MatDestroy(&rart->Rt));
179566063dSJacob Faibussowitsch PetscCall(MatDestroy(&rart->RARt));
189566063dSJacob Faibussowitsch PetscCall(MatDestroy(&rart->ARt));
199566063dSJacob Faibussowitsch PetscCall(PetscFree(rart->work));
20cc1eb50dSBarry Smith if (rart->destroy) PetscCall((*rart->destroy)(&rart->data));
219566063dSJacob Faibussowitsch PetscCall(PetscFree(rart));
223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
23807171c5SHong Zhang }
24807171c5SHong Zhang
MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,PetscReal fill,Mat C)25d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, PetscReal fill, Mat C)
26d71ae5a4SJacob Faibussowitsch {
27807171c5SHong Zhang Mat P;
28cc1eb50dSBarry Smith MatProductCtx_RARt *rart;
29335efc43SPeter Brune MatColoring coloring;
30807171c5SHong Zhang MatTransposeColoring matcoloring;
31807171c5SHong Zhang ISColoring iscoloring;
328d0a38e4SHong Zhang Mat Rt_dense, RARt_dense;
33807171c5SHong Zhang
34807171c5SHong Zhang PetscFunctionBegin;
356718818eSStefano Zampini MatCheckProduct(C, 4);
3628b400f6SJacob Faibussowitsch PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
37807171c5SHong Zhang /* create symbolic P=Rt */
387fb60732SBarry Smith PetscCall(MatTransposeSymbolic(R, &P));
39807171c5SHong Zhang
40807171c5SHong Zhang /* get symbolic C=Pt*A*P */
419566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A, P, fill, C));
4258b7e2c1SStefano Zampini PetscCall(MatSetBlockSizes(C, R->rmap->bs, R->rmap->bs));
434222ddf1SHong Zhang C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart;
44807171c5SHong Zhang
45807171c5SHong Zhang /* create a supporting struct */
469566063dSJacob Faibussowitsch PetscCall(PetscNew(&rart));
476718818eSStefano Zampini C->product->data = rart;
48cc1eb50dSBarry Smith C->product->destroy = MatProductCtxDestroy_SeqAIJ_RARt;
49807171c5SHong Zhang
502ef1f0ffSBarry Smith /* Use coloring */
514222ddf1SHong Zhang /* inode causes memory problem */
529566063dSJacob Faibussowitsch PetscCall(MatSetOption(C, MAT_USE_INODES, PETSC_FALSE));
534d478ae7SHong Zhang
54807171c5SHong Zhang /* Create MatTransposeColoring from symbolic C=R*A*R^T */
559566063dSJacob Faibussowitsch PetscCall(MatColoringCreate(C, &coloring));
569566063dSJacob Faibussowitsch PetscCall(MatColoringSetDistance(coloring, 2));
579566063dSJacob Faibussowitsch PetscCall(MatColoringSetType(coloring, MATCOLORINGSL));
589566063dSJacob Faibussowitsch PetscCall(MatColoringSetFromOptions(coloring));
599566063dSJacob Faibussowitsch PetscCall(MatColoringApply(coloring, &iscoloring));
609566063dSJacob Faibussowitsch PetscCall(MatColoringDestroy(&coloring));
619566063dSJacob Faibussowitsch PetscCall(MatTransposeColoringCreate(C, iscoloring, &matcoloring));
622205254eSKarl Rupp
63807171c5SHong Zhang rart->matcoloring = matcoloring;
649566063dSJacob Faibussowitsch PetscCall(ISColoringDestroy(&iscoloring));
653b1b9624SHong Zhang
66807171c5SHong Zhang /* Create Rt_dense */
679566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &Rt_dense));
689566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Rt_dense, A->cmap->n, matcoloring->ncolors, A->cmap->n, matcoloring->ncolors));
699566063dSJacob Faibussowitsch PetscCall(MatSetType(Rt_dense, MATSEQDENSE));
709566063dSJacob Faibussowitsch PetscCall(MatSeqDenseSetPreallocation(Rt_dense, NULL));
712205254eSKarl Rupp
72807171c5SHong Zhang Rt_dense->assembled = PETSC_TRUE;
73807171c5SHong Zhang rart->Rt = Rt_dense;
74807171c5SHong Zhang
75807171c5SHong Zhang /* Create RARt_dense = R*A*Rt_dense */
769566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &RARt_dense));
779566063dSJacob Faibussowitsch PetscCall(MatSetSizes(RARt_dense, C->rmap->n, matcoloring->ncolors, C->rmap->n, matcoloring->ncolors));
789566063dSJacob Faibussowitsch PetscCall(MatSetType(RARt_dense, MATSEQDENSE));
799566063dSJacob Faibussowitsch PetscCall(MatSeqDenseSetPreallocation(RARt_dense, NULL));
802205254eSKarl Rupp
81807171c5SHong Zhang rart->RARt = RARt_dense;
82807171c5SHong Zhang
83807171c5SHong Zhang /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */
849566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(A->rmap->n * 4, &rart->work));
85807171c5SHong Zhang
86807171c5SHong Zhang /* clean up */
879566063dSJacob Faibussowitsch PetscCall(MatDestroy(&P));
88807171c5SHong Zhang
89807171c5SHong Zhang #if defined(PETSC_USE_INFO)
901ce71dffSSatish Balay {
916718818eSStefano Zampini Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
92f4f49eeaSPierre Jolivet PetscReal density = (PetscReal)c->nz / (RARt_dense->rmap->n * RARt_dense->cmap->n);
939566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "C=R*(A*Rt) via coloring C - use sparse-dense inner products\n"));
9463a3b9bcSJacob Faibussowitsch PetscCall(PetscInfo(C, "RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n", RARt_dense->rmap->n, RARt_dense->cmap->n, R->cmap->n, R->rmap->n, c->nz, (double)density));
951ce71dffSSatish Balay }
96807171c5SHong Zhang #endif
973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
98807171c5SHong Zhang }
99807171c5SHong Zhang
100807171c5SHong Zhang /*
101807171c5SHong Zhang RAB = R * A * B, R and A in seqaij format, B in dense format;
102807171c5SHong Zhang */
MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R,Mat A,Mat B,Mat RAB,PetscScalar * work)10366976f2fSJacob Faibussowitsch static PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R, Mat A, Mat B, Mat RAB, PetscScalar *work)
104d71ae5a4SJacob Faibussowitsch {
105807171c5SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *r = (Mat_SeqAIJ *)R->data;
1061683a169SBarry Smith PetscScalar r1, r2, r3, r4;
1071683a169SBarry Smith const PetscScalar *b, *b1, *b2, *b3, *b4;
108807171c5SHong Zhang MatScalar *aa, *ra;
109807171c5SHong Zhang PetscInt cn = B->cmap->n, bm = B->rmap->n, col, i, j, n, *ai = a->i, *aj, am = A->rmap->n;
110807171c5SHong Zhang PetscInt am2 = 2 * am, am3 = 3 * am, bm4 = 4 * bm;
111807171c5SHong Zhang PetscScalar *d, *c, *c2, *c3, *c4;
112807171c5SHong Zhang PetscInt *rj, rm = R->rmap->n, dm = RAB->rmap->n, dn = RAB->cmap->n;
113807171c5SHong Zhang PetscInt rm2 = 2 * rm, rm3 = 3 * rm, colrm;
114807171c5SHong Zhang
115807171c5SHong Zhang PetscFunctionBegin;
1163ba16761SJacob Faibussowitsch if (!dm || !dn) PetscFunctionReturn(PETSC_SUCCESS);
11708401ef6SPierre Jolivet PetscCheck(bm == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT, A->cmap->n, bm);
11808401ef6SPierre Jolivet PetscCheck(am == R->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in R %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT, R->cmap->n, am);
11908401ef6SPierre Jolivet PetscCheck(R->rmap->n == RAB->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows in RAB %" PetscInt_FMT " not equal rows in R %" PetscInt_FMT, RAB->rmap->n, R->rmap->n);
12008401ef6SPierre Jolivet PetscCheck(B->cmap->n == RAB->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in RAB %" PetscInt_FMT " not equal columns in B %" PetscInt_FMT, RAB->cmap->n, B->cmap->n);
121807171c5SHong Zhang
122274010c0SHong Zhang { /*
123274010c0SHong Zhang This approach is not as good as original ones (will be removed later), but it reveals that
124c4762a1bSJed Brown AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c
125274010c0SHong Zhang */
126274010c0SHong Zhang PetscBool via_matmatmult = PETSC_FALSE;
1279566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-matrart_via_matmatmult", &via_matmatmult, NULL));
128274010c0SHong Zhang if (via_matmatmult) {
1294222ddf1SHong Zhang Mat AB_den = NULL;
1309566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &AB_den));
1319566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqDense(A, B, 0.0, AB_den));
1329566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A, B, AB_den));
1339566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(R, AB_den, RAB));
1349566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AB_den));
1353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
136274010c0SHong Zhang }
137274010c0SHong Zhang }
138274010c0SHong Zhang
1399566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &b));
1409566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(RAB, &d));
1419371c9d4SSatish Balay b1 = b;
1429371c9d4SSatish Balay b2 = b1 + bm;
1439371c9d4SSatish Balay b3 = b2 + bm;
1449371c9d4SSatish Balay b4 = b3 + bm;
1459371c9d4SSatish Balay c = work;
1469371c9d4SSatish Balay c2 = c + am;
1479371c9d4SSatish Balay c3 = c2 + am;
1489371c9d4SSatish Balay c4 = c3 + am;
149807171c5SHong Zhang for (col = 0; col < cn - 4; col += 4) { /* over columns of C */
150807171c5SHong Zhang for (i = 0; i < am; i++) { /* over rows of A in those columns */
151807171c5SHong Zhang r1 = r2 = r3 = r4 = 0.0;
152807171c5SHong Zhang n = ai[i + 1] - ai[i];
153807171c5SHong Zhang aj = a->j + ai[i];
154807171c5SHong Zhang aa = a->a + ai[i];
155807171c5SHong Zhang for (j = 0; j < n; j++) {
156807171c5SHong Zhang r1 += (*aa) * b1[*aj];
157807171c5SHong Zhang r2 += (*aa) * b2[*aj];
158807171c5SHong Zhang r3 += (*aa) * b3[*aj];
159807171c5SHong Zhang r4 += (*aa++) * b4[*aj++];
160807171c5SHong Zhang }
161807171c5SHong Zhang c[i] = r1;
162807171c5SHong Zhang c[am + i] = r2;
163807171c5SHong Zhang c[am2 + i] = r3;
164807171c5SHong Zhang c[am3 + i] = r4;
165807171c5SHong Zhang }
166807171c5SHong Zhang b1 += bm4;
167807171c5SHong Zhang b2 += bm4;
168807171c5SHong Zhang b3 += bm4;
169807171c5SHong Zhang b4 += bm4;
170807171c5SHong Zhang
171807171c5SHong Zhang /* RAB[:,col] = R*C[:,col] */
172807171c5SHong Zhang colrm = col * rm;
173807171c5SHong Zhang for (i = 0; i < rm; i++) { /* over rows of R in those columns */
174807171c5SHong Zhang r1 = r2 = r3 = r4 = 0.0;
175807171c5SHong Zhang n = r->i[i + 1] - r->i[i];
176807171c5SHong Zhang rj = r->j + r->i[i];
177807171c5SHong Zhang ra = r->a + r->i[i];
178807171c5SHong Zhang for (j = 0; j < n; j++) {
179807171c5SHong Zhang r1 += (*ra) * c[*rj];
180807171c5SHong Zhang r2 += (*ra) * c2[*rj];
181807171c5SHong Zhang r3 += (*ra) * c3[*rj];
182807171c5SHong Zhang r4 += (*ra++) * c4[*rj++];
183807171c5SHong Zhang }
184807171c5SHong Zhang d[colrm + i] = r1;
185807171c5SHong Zhang d[colrm + rm + i] = r2;
186807171c5SHong Zhang d[colrm + rm2 + i] = r3;
187807171c5SHong Zhang d[colrm + rm3 + i] = r4;
188807171c5SHong Zhang }
189807171c5SHong Zhang }
190807171c5SHong Zhang for (; col < cn; col++) { /* over extra columns of C */
191807171c5SHong Zhang for (i = 0; i < am; i++) { /* over rows of A in those columns */
192807171c5SHong Zhang r1 = 0.0;
193807171c5SHong Zhang n = a->i[i + 1] - a->i[i];
194807171c5SHong Zhang aj = a->j + a->i[i];
195807171c5SHong Zhang aa = a->a + a->i[i];
196ad540459SPierre Jolivet for (j = 0; j < n; j++) r1 += (*aa++) * b1[*aj++];
197807171c5SHong Zhang c[i] = r1;
198807171c5SHong Zhang }
199807171c5SHong Zhang b1 += bm;
200807171c5SHong Zhang
201807171c5SHong Zhang for (i = 0; i < rm; i++) { /* over rows of R in those columns */
202807171c5SHong Zhang r1 = 0.0;
203807171c5SHong Zhang n = r->i[i + 1] - r->i[i];
204807171c5SHong Zhang rj = r->j + r->i[i];
205807171c5SHong Zhang ra = r->a + r->i[i];
206ad540459SPierre Jolivet for (j = 0; j < n; j++) r1 += (*ra++) * c[*rj++];
207807171c5SHong Zhang d[col * rm + i] = r1;
208807171c5SHong Zhang }
209807171c5SHong Zhang }
2109566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(cn * 2.0 * (a->nz + r->nz)));
211807171c5SHong Zhang
2129566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &b));
2139566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(RAB, &d));
2149566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(RAB, MAT_FINAL_ASSEMBLY));
2159566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(RAB, MAT_FINAL_ASSEMBLY));
2163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
217807171c5SHong Zhang }
218807171c5SHong Zhang
MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,Mat C)219d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, Mat C)
220d71ae5a4SJacob Faibussowitsch {
221cc1eb50dSBarry Smith MatProductCtx_RARt *rart;
222807171c5SHong Zhang MatTransposeColoring matcoloring;
2238d0a38e4SHong Zhang Mat Rt, RARt;
224807171c5SHong Zhang
225807171c5SHong Zhang PetscFunctionBegin;
2266718818eSStefano Zampini MatCheckProduct(C, 3);
22728b400f6SJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
228cc1eb50dSBarry Smith rart = (MatProductCtx_RARt *)C->product->data;
2296718818eSStefano Zampini
230807171c5SHong Zhang /* Get dense Rt by Apply MatTransposeColoring to R */
231807171c5SHong Zhang matcoloring = rart->matcoloring;
232807171c5SHong Zhang Rt = rart->Rt;
2339566063dSJacob Faibussowitsch PetscCall(MatTransColoringApplySpToDen(matcoloring, R, Rt));
234807171c5SHong Zhang
23550647e95SHong Zhang /* Get dense RARt = R*A*Rt -- dominates! */
236807171c5SHong Zhang RARt = rart->RARt;
2379566063dSJacob Faibussowitsch PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R, A, Rt, RARt, rart->work));
238807171c5SHong Zhang
239807171c5SHong Zhang /* Recover C from C_dense */
2409566063dSJacob Faibussowitsch PetscCall(MatTransColoringApplyDenToSp(matcoloring, RARt, C));
2413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
242807171c5SHong Zhang }
243807171c5SHong Zhang
MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,PetscReal fill,Mat C)244d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, PetscReal fill, Mat C)
245d71ae5a4SJacob Faibussowitsch {
2464222ddf1SHong Zhang Mat ARt;
247cc1eb50dSBarry Smith MatProductCtx_RARt *rart;
2486718818eSStefano Zampini char *alg;
2494fa85fdeSHong Zhang
25095a72cc5SHong Zhang PetscFunctionBegin;
2516718818eSStefano Zampini MatCheckProduct(C, 4);
25228b400f6SJacob Faibussowitsch PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2534222ddf1SHong Zhang /* create symbolic ARt = A*R^T */
2549566063dSJacob Faibussowitsch PetscCall(MatProductCreate(A, R, NULL, &ARt));
2559566063dSJacob Faibussowitsch PetscCall(MatProductSetType(ARt, MATPRODUCT_ABt));
2569566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(ARt, "sorted"));
2579566063dSJacob Faibussowitsch PetscCall(MatProductSetFill(ARt, fill));
2589566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(ARt));
2599566063dSJacob Faibussowitsch PetscCall(MatProductSymbolic(ARt));
2604222ddf1SHong Zhang
2614222ddf1SHong Zhang /* compute symbolic C = R*ARt */
2627a3c3d58SStefano Zampini /* set algorithm for C = R*ARt */
2639566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(C->product->alg, &alg));
2649566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C, "sorted"));
2659566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(R, ARt, fill, C));
2667a3c3d58SStefano Zampini /* resume original algorithm for C */
2679566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C, alg));
2689566063dSJacob Faibussowitsch PetscCall(PetscFree(alg));
2694222ddf1SHong Zhang
2704222ddf1SHong Zhang C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult;
27155bea0ebSHong Zhang
2729566063dSJacob Faibussowitsch PetscCall(PetscNew(&rart));
2733b1b9624SHong Zhang rart->ARt = ARt;
2746718818eSStefano Zampini C->product->data = rart;
275cc1eb50dSBarry Smith C->product->destroy = MatProductCtxDestroy_SeqAIJ_RARt;
2769566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n"));
2773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
27836104f73SHong Zhang }
27955bea0ebSHong Zhang
MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,Mat C)280d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, Mat C)
281d71ae5a4SJacob Faibussowitsch {
282cc1eb50dSBarry Smith MatProductCtx_RARt *rart;
28355bea0ebSHong Zhang
28455bea0ebSHong Zhang PetscFunctionBegin;
2856718818eSStefano Zampini MatCheckProduct(C, 3);
28628b400f6SJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
287cc1eb50dSBarry Smith rart = (MatProductCtx_RARt *)C->product->data;
2889566063dSJacob Faibussowitsch PetscCall(MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A, R, rart->ARt)); /* dominate! */
2899566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(R, rart->ARt, C));
2903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
291807171c5SHong Zhang }
29255bea0ebSHong Zhang
MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat R,PetscReal fill,Mat C)293d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat R, PetscReal fill, Mat C)
294d71ae5a4SJacob Faibussowitsch {
29595a72cc5SHong Zhang Mat Rt;
296cc1eb50dSBarry Smith MatProductCtx_RARt *rart;
29755bea0ebSHong Zhang
29855bea0ebSHong Zhang PetscFunctionBegin;
2996718818eSStefano Zampini MatCheckProduct(C, 4);
30028b400f6SJacob Faibussowitsch PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
301acd337a6SBarry Smith PetscCall(MatTranspose(R, MAT_INITIAL_MATRIX, &Rt));
3029566063dSJacob Faibussowitsch PetscCall(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R, A, Rt, fill, C));
30395a72cc5SHong Zhang
3049566063dSJacob Faibussowitsch PetscCall(PetscNew(&rart));
3056718818eSStefano Zampini rart->data = C->product->data;
3066718818eSStefano Zampini rart->destroy = C->product->destroy;
30795a72cc5SHong Zhang rart->Rt = Rt;
3086718818eSStefano Zampini C->product->data = rart;
309cc1eb50dSBarry Smith C->product->destroy = MatProductCtxDestroy_SeqAIJ_RARt;
3104222ddf1SHong Zhang C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ;
3119566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n"));
3123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
31355bea0ebSHong Zhang }
31455bea0ebSHong Zhang
MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A,Mat R,Mat C)315d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A, Mat R, Mat C)
316d71ae5a4SJacob Faibussowitsch {
317cc1eb50dSBarry Smith MatProductCtx_RARt *rart;
31855bea0ebSHong Zhang
31955bea0ebSHong Zhang PetscFunctionBegin;
3206718818eSStefano Zampini MatCheckProduct(C, 3);
32128b400f6SJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
322cc1eb50dSBarry Smith rart = (MatProductCtx_RARt *)C->product->data;
323acd337a6SBarry Smith PetscCall(MatTranspose(R, MAT_REUSE_MATRIX, &rart->Rt));
3246718818eSStefano Zampini /* MatMatMatMultSymbolic used a different data */
3256718818eSStefano Zampini C->product->data = rart->data;
3269566063dSJacob Faibussowitsch PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R, A, rart->Rt, C));
3276718818eSStefano Zampini C->product->data = rart;
3283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
32955bea0ebSHong Zhang }
33055bea0ebSHong Zhang
MatRARt_SeqAIJ_SeqAIJ(Mat A,Mat R,MatReuse scall,PetscReal fill,Mat * C)331d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A, Mat R, MatReuse scall, PetscReal fill, Mat *C)
332d71ae5a4SJacob Faibussowitsch {
33355bea0ebSHong Zhang const char *algTypes[3] = {"matmatmatmult", "matmattransposemult", "coloring_rart"};
33455bea0ebSHong Zhang PetscInt alg = 0; /* set default algorithm */
33555bea0ebSHong Zhang
33655bea0ebSHong Zhang PetscFunctionBegin;
33755bea0ebSHong Zhang if (scall == MAT_INITIAL_MATRIX) {
338d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "MatRARt", "Mat");
3399566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-matrart_via", "Algorithmic approach", "MatRARt", algTypes, 3, algTypes[0], &alg, NULL));
340d0609cedSBarry Smith PetscOptionsEnd();
341b56132cbSHong Zhang
3429566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_RARtSymbolic, A, R, 0, 0));
3439566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, C));
34455bea0ebSHong Zhang switch (alg) {
34555bea0ebSHong Zhang case 1:
34655bea0ebSHong Zhang /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */
3479566063dSJacob Faibussowitsch PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, *C));
34855bea0ebSHong Zhang break;
34955bea0ebSHong Zhang case 2:
35055bea0ebSHong Zhang /* via coloring_rart: apply coloring C = R*A*R^T */
3519566063dSJacob Faibussowitsch PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, *C));
35255bea0ebSHong Zhang break;
35355bea0ebSHong Zhang default:
35455bea0ebSHong Zhang /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */
3559566063dSJacob Faibussowitsch PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, *C));
35655bea0ebSHong Zhang break;
35755bea0ebSHong Zhang }
3589566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_RARtSymbolic, A, R, 0, 0));
3595008f5a7SHong Zhang }
3605008f5a7SHong Zhang
3619566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_RARtNumeric, A, R, 0, 0));
3629566063dSJacob Faibussowitsch PetscCall(((*C)->ops->rartnumeric)(A, R, *C));
3639566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_RARtNumeric, A, R, 0, 0));
3643ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
365807171c5SHong Zhang }
3664222ddf1SHong Zhang
MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C)367d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C)
368d71ae5a4SJacob Faibussowitsch {
3694222ddf1SHong Zhang Mat_Product *product = C->product;
3704222ddf1SHong Zhang Mat A = product->A, R = product->B;
3714222ddf1SHong Zhang MatProductAlgorithm alg = product->alg;
3724222ddf1SHong Zhang PetscReal fill = product->fill;
3734222ddf1SHong Zhang PetscBool flg;
3744222ddf1SHong Zhang
3754222ddf1SHong Zhang PetscFunctionBegin;
3769566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg, "r*a*rt", &flg));
3774222ddf1SHong Zhang if (flg) {
3789566063dSJacob Faibussowitsch PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, C));
3794222ddf1SHong Zhang goto next;
3804222ddf1SHong Zhang }
3814222ddf1SHong Zhang
3829566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg, "r*art", &flg));
3834222ddf1SHong Zhang if (flg) {
3849566063dSJacob Faibussowitsch PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, C));
3854222ddf1SHong Zhang goto next;
3864222ddf1SHong Zhang }
3874222ddf1SHong Zhang
3889566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg, "coloring_rart", &flg));
3894222ddf1SHong Zhang if (flg) {
3909566063dSJacob Faibussowitsch PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, C));
3914222ddf1SHong Zhang goto next;
3924222ddf1SHong Zhang }
3934222ddf1SHong Zhang
3944222ddf1SHong Zhang SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductAlgorithm is not supported");
3954222ddf1SHong Zhang
3964222ddf1SHong Zhang next:
3974222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_RARt;
3983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3994222ddf1SHong Zhang }
400