1c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
2c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
382b1193eSBarry Smith
4b350b9acSSatish Balay /*@
511a5261eSBarry Smith MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix
6ff585edeSJed Brown
711a5261eSBarry Smith Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
8ff585edeSJed Brown
9ff585edeSJed Brown Input Parameter:
1011a5261eSBarry Smith . A - the `MATMAIJ` matrix
11ff585edeSJed Brown
12ff585edeSJed Brown Output Parameter:
1311a5261eSBarry Smith . B - the `MATAIJ` matrix
14ff585edeSJed Brown
15ff585edeSJed Brown Level: advanced
16ff585edeSJed Brown
1711a5261eSBarry Smith Note:
1811a5261eSBarry Smith The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
19ff585edeSJed Brown
201cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()`
21ff585edeSJed Brown @*/
MatMAIJGetAIJ(Mat A,Mat * B)22d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B)
23d71ae5a4SJacob Faibussowitsch {
24ace3abfcSBarry Smith PetscBool ismpimaij, isseqmaij;
25b9b97703SBarry Smith
26b9b97703SBarry Smith PetscFunctionBegin;
279566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij));
289566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij));
29b9b97703SBarry Smith if (ismpimaij) {
30b9b97703SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
31b9b97703SBarry Smith
32b9b97703SBarry Smith *B = b->A;
33b9b97703SBarry Smith } else if (isseqmaij) {
34b9b97703SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
35b9b97703SBarry Smith
36b9b97703SBarry Smith *B = b->AIJ;
37b9b97703SBarry Smith } else {
38b9b97703SBarry Smith *B = A;
39b9b97703SBarry Smith }
403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
41b9b97703SBarry Smith }
42b9b97703SBarry Smith
43480dffcfSJed Brown /*@
4411a5261eSBarry Smith MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size
45ff585edeSJed Brown
463f9fe445SBarry Smith Logically Collective
47ff585edeSJed Brown
48d8d19677SJose E. Roman Input Parameters:
4911a5261eSBarry Smith + A - the `MATMAIJ` matrix
50ff585edeSJed Brown - dof - the block size for the new matrix
51ff585edeSJed Brown
52ff585edeSJed Brown Output Parameter:
5311a5261eSBarry Smith . B - the new `MATMAIJ` matrix
54ff585edeSJed Brown
55ff585edeSJed Brown Level: advanced
56ff585edeSJed Brown
571cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MatCreateMAIJ()`
58ff585edeSJed Brown @*/
MatMAIJRedimension(Mat A,PetscInt dof,Mat * B)59d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B)
60d71ae5a4SJacob Faibussowitsch {
610298fd71SBarry Smith Mat Aij = NULL;
62b9b97703SBarry Smith
63b9b97703SBarry Smith PetscFunctionBegin;
64c5eb9154SBarry Smith PetscValidLogicalCollectiveInt(A, dof, 2);
659566063dSJacob Faibussowitsch PetscCall(MatMAIJGetAIJ(A, &Aij));
669566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(Aij, dof, B));
673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
68b9b97703SBarry Smith }
69b9b97703SBarry Smith
MatDestroy_SeqMAIJ(Mat A)7080eb40d0SJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
71d71ae5a4SJacob Faibussowitsch {
724cb9d9b8SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
7382b1193eSBarry Smith
7482b1193eSBarry Smith PetscFunctionBegin;
759566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ));
769566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data));
772e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL));
789566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL));
799566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL));
803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
814cb9d9b8SBarry Smith }
824cb9d9b8SBarry Smith
MatSetUp_MAIJ(Mat A)8380eb40d0SJacob Faibussowitsch static PetscErrorCode MatSetUp_MAIJ(Mat A)
84d71ae5a4SJacob Faibussowitsch {
85356c569eSBarry Smith PetscFunctionBegin;
86ce94432eSBarry Smith SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
87356c569eSBarry Smith }
88356c569eSBarry Smith
MatView_SeqMAIJ(Mat A,PetscViewer viewer)8980eb40d0SJacob Faibussowitsch static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer)
90d71ae5a4SJacob Faibussowitsch {
910fd73130SBarry Smith Mat B;
920fd73130SBarry Smith
930fd73130SBarry Smith PetscFunctionBegin;
949566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
959566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer));
969566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B));
973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
980fd73130SBarry Smith }
990fd73130SBarry Smith
MatView_MPIMAIJ(Mat A,PetscViewer viewer)10080eb40d0SJacob Faibussowitsch static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer)
101d71ae5a4SJacob Faibussowitsch {
1020fd73130SBarry Smith Mat B;
1030fd73130SBarry Smith
1040fd73130SBarry Smith PetscFunctionBegin;
1059566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B));
1069566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer));
1079566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B));
1083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1090fd73130SBarry Smith }
1100fd73130SBarry Smith
MatDestroy_MPIMAIJ(Mat A)11180eb40d0SJacob Faibussowitsch static PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
112d71ae5a4SJacob Faibussowitsch {
1134cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
1144cb9d9b8SBarry Smith
1154cb9d9b8SBarry Smith PetscFunctionBegin;
1169566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->AIJ));
1179566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->OAIJ));
1189566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->A));
1199566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->ctx));
1209566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->w));
1219566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data));
1222e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL));
1239566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL));
1249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL));
1259566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
127b9b97703SBarry Smith }
128b9b97703SBarry Smith
1290bad9183SKris Buschelman /*MC
130fafad747SKris Buschelman MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1310bad9183SKris Buschelman multicomponent problems, interpolating or restricting each component the same way independently.
13211a5261eSBarry Smith The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices.
1330bad9183SKris Buschelman
1340bad9183SKris Buschelman Operations provided:
13567b8a455SSatish Balay .vb
13611a5261eSBarry Smith MatMult()
13711a5261eSBarry Smith MatMultTranspose()
13811a5261eSBarry Smith MatMultAdd()
13911a5261eSBarry Smith MatMultTransposeAdd()
14067b8a455SSatish Balay .ve
1410bad9183SKris Buschelman
1420bad9183SKris Buschelman Level: advanced
1430bad9183SKris Buschelman
1441cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
1450bad9183SKris Buschelman M*/
1460bad9183SKris Buschelman
MatCreate_MAIJ(Mat A)147d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
148d71ae5a4SJacob Faibussowitsch {
1494cb9d9b8SBarry Smith Mat_MPIMAIJ *b;
15064b52464SHong Zhang PetscMPIInt size;
15182b1193eSBarry Smith
15282b1193eSBarry Smith PetscFunctionBegin;
1534dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b));
154b0a32e0cSBarry Smith A->data = (void *)b;
15526fbe8dcSKarl Rupp
1569566063dSJacob Faibussowitsch PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
15726fbe8dcSKarl Rupp
158356c569eSBarry Smith A->ops->setup = MatSetUp_MAIJ;
159f4a53059SBarry Smith
160f4259b30SLisandro Dalcin b->AIJ = NULL;
161cd3bbe55SBarry Smith b->dof = 0;
162f4259b30SLisandro Dalcin b->OAIJ = NULL;
163f4259b30SLisandro Dalcin b->ctx = NULL;
164f4259b30SLisandro Dalcin b->w = NULL;
1659566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
16664b52464SHong Zhang if (size == 1) {
1679566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ));
16864b52464SHong Zhang } else {
1699566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ));
17064b52464SHong Zhang }
17132e7c8b0SBarry Smith A->preallocated = PETSC_TRUE;
17232e7c8b0SBarry Smith A->assembled = PETSC_TRUE;
1733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
17482b1193eSBarry Smith }
17582b1193eSBarry Smith
17680eb40d0SJacob Faibussowitsch #if PetscHasAttribute(always_inline)
17780eb40d0SJacob Faibussowitsch #define PETSC_FORCE_INLINE __attribute__((always_inline))
17880eb40d0SJacob Faibussowitsch #else
17980eb40d0SJacob Faibussowitsch #define PETSC_FORCE_INLINE
18080eb40d0SJacob Faibussowitsch #endif
181bb5b24f5SJacob Faibussowitsch
1829927e305SJacob Faibussowitsch #if defined(__clang__)
183bb5b24f5SJacob Faibussowitsch #define PETSC_PRAGMA_UNROLL _Pragma("unroll")
184bb5b24f5SJacob Faibussowitsch #else
185bb5b24f5SJacob Faibussowitsch #define PETSC_PRAGMA_UNROLL
186bb5b24f5SJacob Faibussowitsch #endif
187bb5b24f5SJacob Faibussowitsch
18880eb40d0SJacob Faibussowitsch enum {
18980eb40d0SJacob Faibussowitsch MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18
19080eb40d0SJacob Faibussowitsch };
19180eb40d0SJacob Faibussowitsch
19280eb40d0SJacob Faibussowitsch // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline'
19380eb40d0SJacob Faibussowitsch // keyword into account for these...
MatMult_MatMultAdd_SeqMAIJ_Template(Mat A,Vec xx,Vec yy,Vec zz,int N)19480eb40d0SJacob Faibussowitsch PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
19580eb40d0SJacob Faibussowitsch {
19680eb40d0SJacob Faibussowitsch const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
19780eb40d0SJacob Faibussowitsch const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
19880eb40d0SJacob Faibussowitsch const Mat baij = b->AIJ;
19980eb40d0SJacob Faibussowitsch const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data;
20080eb40d0SJacob Faibussowitsch const PetscInt m = baij->rmap->n;
20180eb40d0SJacob Faibussowitsch const PetscInt nz = a->nz;
20280eb40d0SJacob Faibussowitsch const PetscInt *idx = a->j;
20380eb40d0SJacob Faibussowitsch const PetscInt *ii = a->i;
20480eb40d0SJacob Faibussowitsch const PetscScalar *v = a->a;
20580eb40d0SJacob Faibussowitsch PetscInt nonzerorow = 0;
20680eb40d0SJacob Faibussowitsch const PetscScalar *x;
20780eb40d0SJacob Faibussowitsch PetscScalar *z;
20880eb40d0SJacob Faibussowitsch
20980eb40d0SJacob Faibussowitsch PetscFunctionBegin;
21080eb40d0SJacob Faibussowitsch PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
21180eb40d0SJacob Faibussowitsch if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz));
21280eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x));
21380eb40d0SJacob Faibussowitsch if (mult_add) {
21480eb40d0SJacob Faibussowitsch PetscCall(VecGetArray(zz, &z));
21580eb40d0SJacob Faibussowitsch } else {
21680eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayWrite(zz, &z));
21780eb40d0SJacob Faibussowitsch }
21880eb40d0SJacob Faibussowitsch
21980eb40d0SJacob Faibussowitsch for (PetscInt i = 0; i < m; ++i) {
22080eb40d0SJacob Faibussowitsch PetscInt jrow = ii[i];
22180eb40d0SJacob Faibussowitsch const PetscInt n = ii[i + 1] - jrow;
22280eb40d0SJacob Faibussowitsch // leave a line so clang-format does not align these decls
22380eb40d0SJacob Faibussowitsch PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0};
22480eb40d0SJacob Faibussowitsch
22580eb40d0SJacob Faibussowitsch nonzerorow += n > 0;
22680eb40d0SJacob Faibussowitsch for (PetscInt j = 0; j < n; ++j, ++jrow) {
22780eb40d0SJacob Faibussowitsch const PetscScalar v_jrow = v[jrow];
22880eb40d0SJacob Faibussowitsch const PetscInt N_idx_jrow = N * idx[jrow];
22980eb40d0SJacob Faibussowitsch
230bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL
23180eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k];
23280eb40d0SJacob Faibussowitsch }
23380eb40d0SJacob Faibussowitsch
234bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL
23580eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) {
23680eb40d0SJacob Faibussowitsch const PetscInt z_idx = N * i + k;
23780eb40d0SJacob Faibussowitsch
23880eb40d0SJacob Faibussowitsch if (mult_add) {
23980eb40d0SJacob Faibussowitsch z[z_idx] += sum[k];
24080eb40d0SJacob Faibussowitsch } else {
24180eb40d0SJacob Faibussowitsch z[z_idx] = sum[k];
24280eb40d0SJacob Faibussowitsch }
24380eb40d0SJacob Faibussowitsch }
24480eb40d0SJacob Faibussowitsch }
24580eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow))));
24680eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x));
24780eb40d0SJacob Faibussowitsch if (mult_add) {
24880eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &z));
24980eb40d0SJacob Faibussowitsch } else {
25080eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(zz, &z));
25180eb40d0SJacob Faibussowitsch }
25280eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
25380eb40d0SJacob Faibussowitsch }
25480eb40d0SJacob Faibussowitsch
MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A,Vec xx,Vec yy,Vec zz,int N)25580eb40d0SJacob Faibussowitsch PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
25680eb40d0SJacob Faibussowitsch {
25780eb40d0SJacob Faibussowitsch const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
25880eb40d0SJacob Faibussowitsch const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
25980eb40d0SJacob Faibussowitsch const Mat baij = b->AIJ;
26080eb40d0SJacob Faibussowitsch const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data;
26180eb40d0SJacob Faibussowitsch const PetscInt m = baij->rmap->n;
26280eb40d0SJacob Faibussowitsch const PetscInt nz = a->nz;
26380eb40d0SJacob Faibussowitsch const PetscInt *a_j = a->j;
26480eb40d0SJacob Faibussowitsch const PetscInt *a_i = a->i;
26580eb40d0SJacob Faibussowitsch const PetscScalar *a_a = a->a;
26680eb40d0SJacob Faibussowitsch const PetscScalar *x;
26780eb40d0SJacob Faibussowitsch PetscScalar *z;
26880eb40d0SJacob Faibussowitsch
26980eb40d0SJacob Faibussowitsch PetscFunctionBegin;
27080eb40d0SJacob Faibussowitsch PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
27180eb40d0SJacob Faibussowitsch if (mult_add) {
27280eb40d0SJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz));
27380eb40d0SJacob Faibussowitsch } else {
27480eb40d0SJacob Faibussowitsch PetscCall(VecSet(zz, 0.0));
27580eb40d0SJacob Faibussowitsch }
27680eb40d0SJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x));
27780eb40d0SJacob Faibussowitsch PetscCall(VecGetArray(zz, &z));
27880eb40d0SJacob Faibussowitsch
27980eb40d0SJacob Faibussowitsch for (PetscInt i = 0; i < m; i++) {
28080eb40d0SJacob Faibussowitsch const PetscInt a_ii = a_i[i];
2818e3a54c0SPierre Jolivet const PetscInt *idx = PetscSafePointerPlusOffset(a_j, a_ii);
2828e3a54c0SPierre Jolivet const PetscScalar *v = PetscSafePointerPlusOffset(a_a, a_ii);
28380eb40d0SJacob Faibussowitsch const PetscInt n = a_i[i + 1] - a_ii;
28480eb40d0SJacob Faibussowitsch PetscScalar alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE];
28580eb40d0SJacob Faibussowitsch
286bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL
28780eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k];
28880eb40d0SJacob Faibussowitsch for (PetscInt j = 0; j < n; ++j) {
28980eb40d0SJacob Faibussowitsch const PetscInt N_idx_j = N * idx[j];
29080eb40d0SJacob Faibussowitsch const PetscScalar v_j = v[j];
29180eb40d0SJacob Faibussowitsch
292bb5b24f5SJacob Faibussowitsch PETSC_PRAGMA_UNROLL
29380eb40d0SJacob Faibussowitsch for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j;
29480eb40d0SJacob Faibussowitsch }
29580eb40d0SJacob Faibussowitsch }
29680eb40d0SJacob Faibussowitsch
29780eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2 * N * nz));
29880eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x));
29980eb40d0SJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &z));
30080eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
30180eb40d0SJacob Faibussowitsch }
30280eb40d0SJacob Faibussowitsch
3039927e305SJacob Faibussowitsch #define MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(N) \
3049927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMult_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
3059927e305SJacob Faibussowitsch { \
3069927e305SJacob Faibussowitsch PetscFunctionBegin; \
3079927e305SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
3089927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \
3099927e305SJacob Faibussowitsch } \
3109927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMultTranspose_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
3119927e305SJacob Faibussowitsch { \
3129927e305SJacob Faibussowitsch PetscFunctionBegin; \
3139927e305SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
3149927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \
3159927e305SJacob Faibussowitsch } \
3169927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMultAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
3179927e305SJacob Faibussowitsch { \
3189927e305SJacob Faibussowitsch PetscFunctionBegin; \
3199927e305SJacob Faibussowitsch PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
3209927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \
3219927e305SJacob Faibussowitsch } \
3229927e305SJacob Faibussowitsch static PetscErrorCode PetscConcat(MatMultTransposeAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
3239927e305SJacob Faibussowitsch { \
3249927e305SJacob Faibussowitsch PetscFunctionBegin; \
3259927e305SJacob Faibussowitsch PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
3269927e305SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); \
32782b1193eSBarry Smith }
328bcc973b7SBarry Smith
3299927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(2)
3309927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(3)
3319927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(4)
3329927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(5)
3339927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(6)
3349927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(7)
3359927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(8)
3369927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(9)
3379927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(10)
3389927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(11)
3399927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(16)
3409927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(18)
341bcc973b7SBarry Smith
3429927e305SJacob Faibussowitsch #undef MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE
343ed1c418cSBarry Smith
MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)34480eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
345d71ae5a4SJacob Faibussowitsch {
3466861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
3476861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
3486861f193SBarry Smith const PetscScalar *x, *v;
3496861f193SBarry Smith PetscScalar *y, *sums;
3506861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
3516861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k;
3526861f193SBarry Smith
3536861f193SBarry Smith PetscFunctionBegin;
3549566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x));
3559566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0));
3569566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y));
3576861f193SBarry Smith idx = a->j;
3586861f193SBarry Smith v = a->a;
3596861f193SBarry Smith ii = a->i;
3606861f193SBarry Smith
3616861f193SBarry Smith for (i = 0; i < m; i++) {
3626861f193SBarry Smith jrow = ii[i];
3636861f193SBarry Smith n = ii[i + 1] - jrow;
3646861f193SBarry Smith sums = y + dof * i;
3656861f193SBarry Smith for (j = 0; j < n; j++) {
366ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
3676861f193SBarry Smith jrow++;
3686861f193SBarry Smith }
3696861f193SBarry Smith }
3706861f193SBarry Smith
3719566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz));
3729566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x));
3739566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y));
3743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3756861f193SBarry Smith }
3766861f193SBarry Smith
MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)37780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
378d71ae5a4SJacob Faibussowitsch {
3796861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
3806861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
3816861f193SBarry Smith const PetscScalar *x, *v;
3826861f193SBarry Smith PetscScalar *y, *sums;
3836861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
3846861f193SBarry Smith PetscInt n, i, jrow, j, dof = b->dof, k;
3856861f193SBarry Smith
3866861f193SBarry Smith PetscFunctionBegin;
3879566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz));
3889566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x));
3899566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y));
3906861f193SBarry Smith idx = a->j;
3916861f193SBarry Smith v = a->a;
3926861f193SBarry Smith ii = a->i;
3936861f193SBarry Smith
3946861f193SBarry Smith for (i = 0; i < m; i++) {
3956861f193SBarry Smith jrow = ii[i];
3966861f193SBarry Smith n = ii[i + 1] - jrow;
3976861f193SBarry Smith sums = y + dof * i;
3986861f193SBarry Smith for (j = 0; j < n; j++) {
399ad540459SPierre Jolivet for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
4006861f193SBarry Smith jrow++;
4016861f193SBarry Smith }
4026861f193SBarry Smith }
4036861f193SBarry Smith
4049566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz));
4059566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x));
4069566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y));
4073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
4086861f193SBarry Smith }
4096861f193SBarry Smith
MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)41080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
411d71ae5a4SJacob Faibussowitsch {
4126861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
4136861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
4146861f193SBarry Smith const PetscScalar *x, *v, *alpha;
4156861f193SBarry Smith PetscScalar *y;
4166861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof;
4176861f193SBarry Smith PetscInt n, i, k;
4186861f193SBarry Smith
4196861f193SBarry Smith PetscFunctionBegin;
4209566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x));
4219566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0));
4229566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y));
4236861f193SBarry Smith for (i = 0; i < m; i++) {
4248e3a54c0SPierre Jolivet idx = PetscSafePointerPlusOffset(a->j, a->i[i]);
4258e3a54c0SPierre Jolivet v = PetscSafePointerPlusOffset(a->a, a->i[i]);
4266861f193SBarry Smith n = a->i[i + 1] - a->i[i];
4276861f193SBarry Smith alpha = x + dof * i;
4286861f193SBarry Smith while (n-- > 0) {
429ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
4309371c9d4SSatish Balay idx++;
4319371c9d4SSatish Balay v++;
4326861f193SBarry Smith }
4336861f193SBarry Smith }
4349566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz));
4359566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x));
4369566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y));
4373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
4386861f193SBarry Smith }
4396861f193SBarry Smith
MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)44080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
441d71ae5a4SJacob Faibussowitsch {
4426861f193SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
4436861f193SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
4446861f193SBarry Smith const PetscScalar *x, *v, *alpha;
4456861f193SBarry Smith PetscScalar *y;
4466861f193SBarry Smith const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof;
4476861f193SBarry Smith PetscInt n, i, k;
4486861f193SBarry Smith
4496861f193SBarry Smith PetscFunctionBegin;
4509566063dSJacob Faibussowitsch if (yy != zz) PetscCall(VecCopy(yy, zz));
4519566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x));
4529566063dSJacob Faibussowitsch PetscCall(VecGetArray(zz, &y));
4536861f193SBarry Smith for (i = 0; i < m; i++) {
4546861f193SBarry Smith idx = a->j + a->i[i];
4556861f193SBarry Smith v = a->a + a->i[i];
4566861f193SBarry Smith n = a->i[i + 1] - a->i[i];
4576861f193SBarry Smith alpha = x + dof * i;
4586861f193SBarry Smith while (n-- > 0) {
459ad540459SPierre Jolivet for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
4609371c9d4SSatish Balay idx++;
4619371c9d4SSatish Balay v++;
4626861f193SBarry Smith }
4636861f193SBarry Smith }
4649566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * dof * a->nz));
4659566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x));
4669566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(zz, &y));
4673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
4686861f193SBarry Smith }
4696861f193SBarry Smith
MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)47080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
471d71ae5a4SJacob Faibussowitsch {
4724cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
473f4a53059SBarry Smith
474b24ad042SBarry Smith PetscFunctionBegin;
475f4a53059SBarry Smith /* start the scatter */
4769566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
4779566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy));
4789566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
4799566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy));
4803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
481f4a53059SBarry Smith }
482f4a53059SBarry Smith
MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)48380eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
484d71ae5a4SJacob Faibussowitsch {
4854cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
486b24ad042SBarry Smith
4874cb9d9b8SBarry Smith PetscFunctionBegin;
4889566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
4899566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy));
4909566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
4919566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
4923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
4934cb9d9b8SBarry Smith }
4944cb9d9b8SBarry Smith
MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)49580eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
496d71ae5a4SJacob Faibussowitsch {
4974cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
4984cb9d9b8SBarry Smith
499b24ad042SBarry Smith PetscFunctionBegin;
5004cb9d9b8SBarry Smith /* start the scatter */
5019566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
5029566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz));
5039566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
5049566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
5053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
5064cb9d9b8SBarry Smith }
5074cb9d9b8SBarry Smith
MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)50880eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
509d71ae5a4SJacob Faibussowitsch {
5104cb9d9b8SBarry Smith Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
511b24ad042SBarry Smith
5124cb9d9b8SBarry Smith PetscFunctionBegin;
5139566063dSJacob Faibussowitsch PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
5149566063dSJacob Faibussowitsch PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz));
5159566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
5169566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
5173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
5184cb9d9b8SBarry Smith }
5194cb9d9b8SBarry Smith
MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)52080eb40d0SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
521d71ae5a4SJacob Faibussowitsch {
5224222ddf1SHong Zhang Mat_Product *product = C->product;
5234222ddf1SHong Zhang
5244222ddf1SHong Zhang PetscFunctionBegin;
525966bd95aSPierre Jolivet PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
5264222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
5273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
5284222ddf1SHong Zhang }
5294222ddf1SHong Zhang
MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)53080eb40d0SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
531d71ae5a4SJacob Faibussowitsch {
5324222ddf1SHong Zhang Mat_Product *product = C->product;
5334222ddf1SHong Zhang PetscBool flg = PETSC_FALSE;
5344222ddf1SHong Zhang Mat A = product->A, P = product->B;
5354222ddf1SHong Zhang PetscInt alg = 1; /* set default algorithm */
5364222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE)
5374222ddf1SHong Zhang const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
5384222ddf1SHong Zhang PetscInt nalg = 4;
5394222ddf1SHong Zhang #else
5404222ddf1SHong Zhang const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
5414222ddf1SHong Zhang PetscInt nalg = 5;
5424222ddf1SHong Zhang #endif
5434222ddf1SHong Zhang
5444222ddf1SHong Zhang PetscFunctionBegin;
54508401ef6SPierre Jolivet PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]);
5464222ddf1SHong Zhang
5474222ddf1SHong Zhang /* PtAP */
5484222ddf1SHong Zhang /* Check matrix local sizes */
5499371c9d4SSatish Balay PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5509371c9d4SSatish Balay A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
5519371c9d4SSatish Balay PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5529371c9d4SSatish Balay A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);
5534222ddf1SHong Zhang
5544222ddf1SHong Zhang /* Set the default algorithm */
5559566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
556835f2295SStefano Zampini if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
5574222ddf1SHong Zhang
5584222ddf1SHong Zhang /* Get runtime option */
559d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
5609566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
561835f2295SStefano Zampini if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
562d0609cedSBarry Smith PetscOptionsEnd();
5634222ddf1SHong Zhang
5649566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg));
5654222ddf1SHong Zhang if (flg) {
5664222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
5673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
5684222ddf1SHong Zhang }
5694222ddf1SHong Zhang
5709566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg));
5714222ddf1SHong Zhang if (flg) {
5724222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
5733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
5744222ddf1SHong Zhang }
5754222ddf1SHong Zhang
5764222ddf1SHong Zhang /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
577835f2295SStefano Zampini PetscCall(PetscInfo(A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
5789566063dSJacob Faibussowitsch PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P));
5799566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions(C));
5803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
5814222ddf1SHong Zhang }
5824222ddf1SHong Zhang
MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)58380eb40d0SJacob Faibussowitsch static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C)
58480eb40d0SJacob Faibussowitsch {
58580eb40d0SJacob Faibussowitsch /* This routine requires testing -- first draft only */
58680eb40d0SJacob Faibussowitsch Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data;
58780eb40d0SJacob Faibussowitsch Mat P = pp->AIJ;
58880eb40d0SJacob Faibussowitsch Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
58980eb40d0SJacob Faibussowitsch Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data;
59080eb40d0SJacob Faibussowitsch Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
59180eb40d0SJacob Faibussowitsch const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
59280eb40d0SJacob Faibussowitsch const PetscInt *ci = c->i, *cj = c->j, *cjj;
59380eb40d0SJacob Faibussowitsch const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
59480eb40d0SJacob Faibussowitsch PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
59580eb40d0SJacob Faibussowitsch const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
59680eb40d0SJacob Faibussowitsch MatScalar *ca = c->a, *caj, *apa;
59780eb40d0SJacob Faibussowitsch
59880eb40d0SJacob Faibussowitsch PetscFunctionBegin;
59980eb40d0SJacob Faibussowitsch /* Allocate temporary array for storage of one row of A*P */
60080eb40d0SJacob Faibussowitsch PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense));
60180eb40d0SJacob Faibussowitsch
60280eb40d0SJacob Faibussowitsch /* Clear old values in C */
60380eb40d0SJacob Faibussowitsch PetscCall(PetscArrayzero(ca, ci[cm]));
60480eb40d0SJacob Faibussowitsch
60580eb40d0SJacob Faibussowitsch for (i = 0; i < am; i++) {
60680eb40d0SJacob Faibussowitsch /* Form sparse row of A*P */
60780eb40d0SJacob Faibussowitsch anzi = ai[i + 1] - ai[i];
60880eb40d0SJacob Faibussowitsch apnzj = 0;
60980eb40d0SJacob Faibussowitsch for (j = 0; j < anzi; j++) {
61080eb40d0SJacob Faibussowitsch /* Get offset within block of P */
61180eb40d0SJacob Faibussowitsch pshift = *aj % ppdof;
61280eb40d0SJacob Faibussowitsch /* Get block row of P */
61380eb40d0SJacob Faibussowitsch prow = *aj++ / ppdof; /* integer division */
61480eb40d0SJacob Faibussowitsch pnzj = pi[prow + 1] - pi[prow];
61580eb40d0SJacob Faibussowitsch pjj = pj + pi[prow];
61680eb40d0SJacob Faibussowitsch paj = pa + pi[prow];
61780eb40d0SJacob Faibussowitsch for (k = 0; k < pnzj; k++) {
61880eb40d0SJacob Faibussowitsch poffset = pjj[k] * ppdof + pshift;
61980eb40d0SJacob Faibussowitsch if (!apjdense[poffset]) {
62080eb40d0SJacob Faibussowitsch apjdense[poffset] = -1;
62180eb40d0SJacob Faibussowitsch apj[apnzj++] = poffset;
62280eb40d0SJacob Faibussowitsch }
62380eb40d0SJacob Faibussowitsch apa[poffset] += (*aa) * paj[k];
62480eb40d0SJacob Faibussowitsch }
62580eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * pnzj));
62680eb40d0SJacob Faibussowitsch aa++;
62780eb40d0SJacob Faibussowitsch }
62880eb40d0SJacob Faibussowitsch
62980eb40d0SJacob Faibussowitsch /* Sort the j index array for quick sparse axpy. */
63080eb40d0SJacob Faibussowitsch /* Note: a array does not need sorting as it is in dense storage locations. */
63180eb40d0SJacob Faibussowitsch PetscCall(PetscSortInt(apnzj, apj));
63280eb40d0SJacob Faibussowitsch
63380eb40d0SJacob Faibussowitsch /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
63480eb40d0SJacob Faibussowitsch prow = i / ppdof; /* integer division */
63580eb40d0SJacob Faibussowitsch pshift = i % ppdof;
63680eb40d0SJacob Faibussowitsch poffset = pi[prow];
63780eb40d0SJacob Faibussowitsch pnzi = pi[prow + 1] - poffset;
63880eb40d0SJacob Faibussowitsch /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
63980eb40d0SJacob Faibussowitsch pJ = pj + poffset;
64080eb40d0SJacob Faibussowitsch pA = pa + poffset;
64180eb40d0SJacob Faibussowitsch for (j = 0; j < pnzi; j++) {
64280eb40d0SJacob Faibussowitsch crow = (*pJ) * ppdof + pshift;
64380eb40d0SJacob Faibussowitsch cjj = cj + ci[crow];
64480eb40d0SJacob Faibussowitsch caj = ca + ci[crow];
64580eb40d0SJacob Faibussowitsch pJ++;
64680eb40d0SJacob Faibussowitsch /* Perform sparse axpy operation. Note cjj includes apj. */
64780eb40d0SJacob Faibussowitsch for (k = 0, nextap = 0; nextap < apnzj; k++) {
64880eb40d0SJacob Faibussowitsch if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
64980eb40d0SJacob Faibussowitsch }
65080eb40d0SJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * apnzj));
65180eb40d0SJacob Faibussowitsch pA++;
65280eb40d0SJacob Faibussowitsch }
65380eb40d0SJacob Faibussowitsch
65480eb40d0SJacob Faibussowitsch /* Zero the current row info for A*P */
65580eb40d0SJacob Faibussowitsch for (j = 0; j < apnzj; j++) {
65680eb40d0SJacob Faibussowitsch apa[apj[j]] = 0.;
65780eb40d0SJacob Faibussowitsch apjdense[apj[j]] = 0;
65880eb40d0SJacob Faibussowitsch }
65980eb40d0SJacob Faibussowitsch }
66080eb40d0SJacob Faibussowitsch
66180eb40d0SJacob Faibussowitsch /* Assemble the final matrix and clean up */
66280eb40d0SJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
66380eb40d0SJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
66480eb40d0SJacob Faibussowitsch PetscCall(PetscFree3(apa, apj, apjdense));
66580eb40d0SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
66680eb40d0SJacob Faibussowitsch }
66780eb40d0SJacob Faibussowitsch
MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)66880eb40d0SJacob Faibussowitsch static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C)
669d71ae5a4SJacob Faibussowitsch {
6700298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL;
6717ba1a0bfSKris Buschelman Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data;
6727ba1a0bfSKris Buschelman Mat P = pp->AIJ;
6737ba1a0bfSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
674d2840e09SBarry Smith PetscInt *pti, *ptj, *ptJ;
6757ba1a0bfSKris Buschelman PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
676d2840e09SBarry Smith const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
677d2840e09SBarry Smith PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
6787ba1a0bfSKris Buschelman MatScalar *ca;
679d2840e09SBarry Smith const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;
6807ba1a0bfSKris Buschelman
6817ba1a0bfSKris Buschelman PetscFunctionBegin;
6827ba1a0bfSKris Buschelman /* Get ij structure of P^T */
6839566063dSJacob Faibussowitsch PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
6847ba1a0bfSKris Buschelman
6857ba1a0bfSKris Buschelman cn = pn * ppdof;
6867ba1a0bfSKris Buschelman /* Allocate ci array, arrays for fill computation and */
6877ba1a0bfSKris Buschelman /* free space for accumulating nonzero column info */
6889566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(cn + 1, &ci));
6897ba1a0bfSKris Buschelman ci[0] = 0;
6907ba1a0bfSKris Buschelman
6917ba1a0bfSKris Buschelman /* Work arrays for rows of P^T*A */
6929566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow));
6939566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ptadenserow, an));
6949566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(denserow, cn));
6957ba1a0bfSKris Buschelman
6967ba1a0bfSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
6977ba1a0bfSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */
6987ba1a0bfSKris Buschelman /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
6999566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space));
7007ba1a0bfSKris Buschelman current_space = free_space;
7017ba1a0bfSKris Buschelman
7027ba1a0bfSKris Buschelman /* Determine symbolic info for each row of C: */
7037ba1a0bfSKris Buschelman for (i = 0; i < pn; i++) {
7047ba1a0bfSKris Buschelman ptnzi = pti[i + 1] - pti[i];
7057ba1a0bfSKris Buschelman ptJ = ptj + pti[i];
7067ba1a0bfSKris Buschelman for (dof = 0; dof < ppdof; dof++) {
7077ba1a0bfSKris Buschelman ptanzi = 0;
7087ba1a0bfSKris Buschelman /* Determine symbolic row of PtA: */
7097ba1a0bfSKris Buschelman for (j = 0; j < ptnzi; j++) {
7107ba1a0bfSKris Buschelman /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
7117ba1a0bfSKris Buschelman arow = ptJ[j] * ppdof + dof;
7127ba1a0bfSKris Buschelman /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
7137ba1a0bfSKris Buschelman anzj = ai[arow + 1] - ai[arow];
7147ba1a0bfSKris Buschelman ajj = aj + ai[arow];
7157ba1a0bfSKris Buschelman for (k = 0; k < anzj; k++) {
7167ba1a0bfSKris Buschelman if (!ptadenserow[ajj[k]]) {
7177ba1a0bfSKris Buschelman ptadenserow[ajj[k]] = -1;
7187ba1a0bfSKris Buschelman ptasparserow[ptanzi++] = ajj[k];
7197ba1a0bfSKris Buschelman }
7207ba1a0bfSKris Buschelman }
7217ba1a0bfSKris Buschelman }
7227ba1a0bfSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
7237ba1a0bfSKris Buschelman ptaj = ptasparserow;
7247ba1a0bfSKris Buschelman cnzi = 0;
7257ba1a0bfSKris Buschelman for (j = 0; j < ptanzi; j++) {
7267ba1a0bfSKris Buschelman /* Get offset within block of P */
7277ba1a0bfSKris Buschelman pshift = *ptaj % ppdof;
7287ba1a0bfSKris Buschelman /* Get block row of P */
7297ba1a0bfSKris Buschelman prow = (*ptaj++) / ppdof; /* integer division */
7307ba1a0bfSKris Buschelman /* P has same number of nonzeros per row as the compressed form */
7317ba1a0bfSKris Buschelman pnzj = pi[prow + 1] - pi[prow];
7327ba1a0bfSKris Buschelman pjj = pj + pi[prow];
7337ba1a0bfSKris Buschelman for (k = 0; k < pnzj; k++) {
7347ba1a0bfSKris Buschelman /* Locations in C are shifted by the offset within the block */
7357ba1a0bfSKris Buschelman /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
7367ba1a0bfSKris Buschelman if (!denserow[pjj[k] * ppdof + pshift]) {
7377ba1a0bfSKris Buschelman denserow[pjj[k] * ppdof + pshift] = -1;
7387ba1a0bfSKris Buschelman sparserow[cnzi++] = pjj[k] * ppdof + pshift;
7397ba1a0bfSKris Buschelman }
7407ba1a0bfSKris Buschelman }
7417ba1a0bfSKris Buschelman }
7427ba1a0bfSKris Buschelman
7437ba1a0bfSKris Buschelman /* sort sparserow */
7449566063dSJacob Faibussowitsch PetscCall(PetscSortInt(cnzi, sparserow));
7457ba1a0bfSKris Buschelman
7467ba1a0bfSKris Buschelman /* If free space is not available, make more free space */
7477ba1a0bfSKris Buschelman /* Double the amount of total space in the list */
74848a46eb9SPierre Jolivet if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space));
7497ba1a0bfSKris Buschelman
7507ba1a0bfSKris Buschelman /* Copy data into free space, and zero out denserows */
7519566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi));
75226fbe8dcSKarl Rupp
7537ba1a0bfSKris Buschelman current_space->array += cnzi;
7547ba1a0bfSKris Buschelman current_space->local_used += cnzi;
7557ba1a0bfSKris Buschelman current_space->local_remaining -= cnzi;
7567ba1a0bfSKris Buschelman
75726fbe8dcSKarl Rupp for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
75826fbe8dcSKarl Rupp for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;
75926fbe8dcSKarl Rupp
7607ba1a0bfSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */
7617ba1a0bfSKris Buschelman /* For now, we will recompute what is needed. */
7627ba1a0bfSKris Buschelman ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
7637ba1a0bfSKris Buschelman }
7647ba1a0bfSKris Buschelman }
7657ba1a0bfSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */
7667ba1a0bfSKris Buschelman /* Allocate space for cj, initialize cj, and */
7677ba1a0bfSKris Buschelman /* destroy list of free space and other temporary array(s) */
768*84648c2dSPierre Jolivet PetscCall(PetscMalloc1(ci[cn], &cj));
7699566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
7709566063dSJacob Faibussowitsch PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow));
7717ba1a0bfSKris Buschelman
7727ba1a0bfSKris Buschelman /* Allocate space for ca */
773*84648c2dSPierre Jolivet PetscCall(PetscCalloc1(ci[cn], &ca));
7747ba1a0bfSKris Buschelman
7757ba1a0bfSKris Buschelman /* put together the new matrix */
7769566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C));
7779566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(C, pp->dof));
7787ba1a0bfSKris Buschelman
7797ba1a0bfSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
7807ba1a0bfSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */
781f4f49eeaSPierre Jolivet c = (Mat_SeqAIJ *)C->data;
782e6b907acSBarry Smith c->free_a = PETSC_TRUE;
783e6b907acSBarry Smith c->free_ij = PETSC_TRUE;
7847ba1a0bfSKris Buschelman c->nonew = 0;
78526fbe8dcSKarl Rupp
7864222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
7874222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP;
7887ba1a0bfSKris Buschelman
7897ba1a0bfSKris Buschelman /* Clean up. */
7909566063dSJacob Faibussowitsch PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
7913ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
7927ba1a0bfSKris Buschelman }
7937ba1a0bfSKris Buschelman
MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)794d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
795d71ae5a4SJacob Faibussowitsch {
7964222ddf1SHong Zhang Mat_Product *product = C->product;
7974222ddf1SHong Zhang Mat A = product->A, P = product->B;
7982121bac1SHong Zhang
7992121bac1SHong Zhang PetscFunctionBegin;
8009566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C));
8013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
8022121bac1SHong Zhang }
8032121bac1SHong Zhang
804bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);
805bc8e477aSFande Kong
MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)806d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C)
807d71ae5a4SJacob Faibussowitsch {
808bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
809bc8e477aSFande Kong
810bc8e477aSFande Kong PetscFunctionBegin;
8119566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C));
8123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
813bc8e477aSFande Kong }
814bc8e477aSFande Kong
8154222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);
816bc8e477aSFande Kong
MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)817d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C)
818d71ae5a4SJacob Faibussowitsch {
819bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
820bc8e477aSFande Kong
821bc8e477aSFande Kong PetscFunctionBegin;
8229566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C));
8234222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
8243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
825bc8e477aSFande Kong }
826bc8e477aSFande Kong
827bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);
828bc8e477aSFande Kong
MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)829d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C)
830d71ae5a4SJacob Faibussowitsch {
831bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
832bc8e477aSFande Kong
833bc8e477aSFande Kong PetscFunctionBegin;
8349566063dSJacob Faibussowitsch PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C));
8353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
836bc8e477aSFande Kong }
837bc8e477aSFande Kong
8384222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);
839bc8e477aSFande Kong
MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)840d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C)
841d71ae5a4SJacob Faibussowitsch {
842bc8e477aSFande Kong Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
843bc8e477aSFande Kong
844bc8e477aSFande Kong PetscFunctionBegin;
8459566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C));
8464222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
8473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
848bc8e477aSFande Kong }
849bc8e477aSFande Kong
MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)850d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
851d71ae5a4SJacob Faibussowitsch {
8524222ddf1SHong Zhang Mat_Product *product = C->product;
8534222ddf1SHong Zhang Mat A = product->A, P = product->B;
8544222ddf1SHong Zhang PetscBool flg;
855bc8e477aSFande Kong
856bc8e477aSFande Kong PetscFunctionBegin;
8579566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce", &flg));
8584222ddf1SHong Zhang if (flg) {
8599566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C));
8604222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP;
8613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
862bc8e477aSFande Kong }
86334bcad68SFande Kong
8649566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg));
865966bd95aSPierre Jolivet PetscCheck(flg, PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
8669566063dSJacob Faibussowitsch PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C));
8674222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP;
8683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
8694222ddf1SHong Zhang }
87034bcad68SFande Kong
MatConvert_SeqMAIJ_SeqAIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)871d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
872d71ae5a4SJacob Faibussowitsch {
8739c4fc4c7SBarry Smith Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
874ceb03754SKris Buschelman Mat a = b->AIJ, B;
8759c4fc4c7SBarry Smith Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data;
8760fd73130SBarry Smith PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
877ba8c8a56SBarry Smith PetscInt *cols;
878ba8c8a56SBarry Smith PetscScalar *vals;
8799c4fc4c7SBarry Smith
8809c4fc4c7SBarry Smith PetscFunctionBegin;
8819566063dSJacob Faibussowitsch PetscCall(MatGetSize(a, &m, &n));
8829566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(dof * m, &ilen));
8839c4fc4c7SBarry Smith for (i = 0; i < m; i++) {
8849c4fc4c7SBarry Smith nmax = PetscMax(nmax, aij->ilen[i]);
88526fbe8dcSKarl Rupp for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
8869c4fc4c7SBarry Smith }
8879566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &B));
8889566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n));
8899566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype));
8909566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen));
8919566063dSJacob Faibussowitsch PetscCall(PetscFree(ilen));
8929566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nmax, &icols));
8939c4fc4c7SBarry Smith ii = 0;
8949c4fc4c7SBarry Smith for (i = 0; i < m; i++) {
8959566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals));
8960fd73130SBarry Smith for (j = 0; j < dof; j++) {
89726fbe8dcSKarl Rupp for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
8989566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
8999c4fc4c7SBarry Smith ii++;
9009c4fc4c7SBarry Smith }
9019566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals));
9029c4fc4c7SBarry Smith }
9039566063dSJacob Faibussowitsch PetscCall(PetscFree(icols));
9049566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
9059566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
906ceb03754SKris Buschelman
907511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) {
9089566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B));
909c3d102feSKris Buschelman } else {
910ceb03754SKris Buschelman *newmat = B;
911c3d102feSKris Buschelman }
9123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
9139c4fc4c7SBarry Smith }
9149c4fc4c7SBarry Smith
915c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
916be1d678aSKris Buschelman
MatConvert_MPIMAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)917d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
918d71ae5a4SJacob Faibussowitsch {
9190fd73130SBarry Smith Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data;
920ceb03754SKris Buschelman Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
9210fd73130SBarry Smith Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
9220fd73130SBarry Smith Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data;
9230fd73130SBarry Smith Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data;
9240fd73130SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data;
9250298fd71SBarry Smith PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
9260298fd71SBarry Smith PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
9270fd73130SBarry Smith PetscInt rstart, cstart, *garray, ii, k;
9280fd73130SBarry Smith PetscScalar *vals, *ovals;
9290fd73130SBarry Smith
9300fd73130SBarry Smith PetscFunctionBegin;
9319566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz));
932d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) {
9330fd73130SBarry Smith nmax = PetscMax(nmax, AIJ->ilen[i]);
9340fd73130SBarry Smith onmax = PetscMax(onmax, OAIJ->ilen[i]);
9350fd73130SBarry Smith for (j = 0; j < dof; j++) {
9360fd73130SBarry Smith dnz[dof * i + j] = AIJ->ilen[i];
9370fd73130SBarry Smith onz[dof * i + j] = OAIJ->ilen[i];
9380fd73130SBarry Smith }
9390fd73130SBarry Smith }
9409566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
9419566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
9429566063dSJacob Faibussowitsch PetscCall(MatSetType(B, newtype));
9439566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz));
9449566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, dof));
9459566063dSJacob Faibussowitsch PetscCall(PetscFree2(dnz, onz));
9460fd73130SBarry Smith
9479566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols));
948d0f46423SBarry Smith rstart = dof * maij->A->rmap->rstart;
949d0f46423SBarry Smith cstart = dof * maij->A->cmap->rstart;
9500fd73130SBarry Smith garray = mpiaij->garray;
9510fd73130SBarry Smith
9520fd73130SBarry Smith ii = rstart;
953d0f46423SBarry Smith for (i = 0; i < A->rmap->n / dof; i++) {
9549566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
9559566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
9560fd73130SBarry Smith for (j = 0; j < dof; j++) {
957ad540459SPierre Jolivet for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j;
958ad540459SPierre Jolivet for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j;
9599566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
9609566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES));
9610fd73130SBarry Smith ii++;
9620fd73130SBarry Smith }
9639566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
9649566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
9650fd73130SBarry Smith }
9669566063dSJacob Faibussowitsch PetscCall(PetscFree2(icols, oicols));
9670fd73130SBarry Smith
9689566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
9699566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
970ceb03754SKris Buschelman
971511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) {
9727adad957SLisandro Dalcin PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
9737adad957SLisandro Dalcin ((PetscObject)A)->refct = 1;
97426fbe8dcSKarl Rupp
9759566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B));
97626fbe8dcSKarl Rupp
9777adad957SLisandro Dalcin ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
978c3d102feSKris Buschelman } else {
979ceb03754SKris Buschelman *newmat = B;
980c3d102feSKris Buschelman }
9813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
9820fd73130SBarry Smith }
9830fd73130SBarry Smith
MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat * newmat)98480eb40d0SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
985d71ae5a4SJacob Faibussowitsch {
9869e516c8fSBarry Smith Mat A;
9879e516c8fSBarry Smith
9889e516c8fSBarry Smith PetscFunctionBegin;
9899566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
9909566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
9919566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A));
9923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
9939e516c8fSBarry Smith }
9940fd73130SBarry Smith
MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat * submat[])99580eb40d0SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
996d71ae5a4SJacob Faibussowitsch {
997ec626240SStefano Zampini Mat A;
998ec626240SStefano Zampini
999ec626240SStefano Zampini PetscFunctionBegin;
10009566063dSJacob Faibussowitsch PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
10019566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat));
10029566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A));
10033ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1004ec626240SStefano Zampini }
1005ec626240SStefano Zampini
1006480dffcfSJed Brown /*@
10070bad9183SKris Buschelman MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
10080bad9183SKris Buschelman operations for multicomponent problems. It interpolates each component the same
100911a5261eSBarry Smith way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices,
101011a5261eSBarry Smith and `MATMPIAIJ` for distributed matrices.
10110bad9183SKris Buschelman
1012ff585edeSJed Brown Collective
1013ff585edeSJed Brown
1014ff585edeSJed Brown Input Parameters:
101511a5261eSBarry Smith + A - the `MATAIJ` matrix describing the action on blocks
1016ff585edeSJed Brown - dof - the block size (number of components per node)
1017ff585edeSJed Brown
1018ff585edeSJed Brown Output Parameter:
101911a5261eSBarry Smith . maij - the new `MATMAIJ` matrix
1020ff585edeSJed Brown
10212ef1f0ffSBarry Smith Level: advanced
10222ef1f0ffSBarry Smith
1023fe59aa6dSJacob Faibussowitsch .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`
1024ff585edeSJed Brown @*/
MatCreateMAIJ(Mat A,PetscInt dof,Mat * maij)1025d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij)
1026d71ae5a4SJacob Faibussowitsch {
1027b24ad042SBarry Smith PetscInt n;
102882b1193eSBarry Smith Mat B;
102990f67b22SBarry Smith PetscBool flg;
1030fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
1031fdc842d1SBarry Smith /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
1032fdc842d1SBarry Smith PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
1033fdc842d1SBarry Smith #endif
103482b1193eSBarry Smith
103582b1193eSBarry Smith PetscFunctionBegin;
1036fdc842d1SBarry Smith dof = PetscAbs(dof);
10379566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A));
1038d72c5c08SBarry Smith
103926fbe8dcSKarl Rupp if (dof == 1) *maij = A;
104026fbe8dcSKarl Rupp else {
10419566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1042c248e2fdSStefano Zampini /* propagate vec type */
10439566063dSJacob Faibussowitsch PetscCall(MatSetVecType(B, A->defaultvectype));
10449566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N));
10459566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, dof));
10469566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, dof));
10479566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap));
10489566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap));
104926fbe8dcSKarl Rupp
1050cd3bbe55SBarry Smith B->assembled = PETSC_TRUE;
1051d72c5c08SBarry Smith
105290f67b22SBarry Smith PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
105390f67b22SBarry Smith if (flg) {
1054feb9fe23SJed Brown Mat_SeqMAIJ *b;
1055feb9fe23SJed Brown
10569566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATSEQMAIJ));
105726fbe8dcSKarl Rupp
10580298fd71SBarry Smith B->ops->setup = NULL;
10594cb9d9b8SBarry Smith B->ops->destroy = MatDestroy_SeqMAIJ;
10600fd73130SBarry Smith B->ops->view = MatView_SeqMAIJ;
10614222ddf1SHong Zhang
1062feb9fe23SJed Brown b = (Mat_SeqMAIJ *)B->data;
1063b9b97703SBarry Smith b->dof = dof;
10644cb9d9b8SBarry Smith b->AIJ = A;
106526fbe8dcSKarl Rupp
1066d72c5c08SBarry Smith if (dof == 2) {
1067cd3bbe55SBarry Smith B->ops->mult = MatMult_SeqMAIJ_2;
1068cd3bbe55SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_2;
1069cd3bbe55SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
1070cd3bbe55SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1071bcc973b7SBarry Smith } else if (dof == 3) {
1072bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_3;
1073bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_3;
1074bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
1075bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1076bcc973b7SBarry Smith } else if (dof == 4) {
1077bcc973b7SBarry Smith B->ops->mult = MatMult_SeqMAIJ_4;
1078bcc973b7SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_4;
1079bcc973b7SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
1080bcc973b7SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1081f9fae5adSBarry Smith } else if (dof == 5) {
1082f9fae5adSBarry Smith B->ops->mult = MatMult_SeqMAIJ_5;
1083f9fae5adSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_5;
1084f9fae5adSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
1085f9fae5adSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
10866cd98798SBarry Smith } else if (dof == 6) {
10876cd98798SBarry Smith B->ops->mult = MatMult_SeqMAIJ_6;
10886cd98798SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_6;
10896cd98798SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
10906cd98798SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1091ed8eea03SBarry Smith } else if (dof == 7) {
1092ed8eea03SBarry Smith B->ops->mult = MatMult_SeqMAIJ_7;
1093ed8eea03SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_7;
1094ed8eea03SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
1095ed8eea03SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
109666ed3db0SBarry Smith } else if (dof == 8) {
109766ed3db0SBarry Smith B->ops->mult = MatMult_SeqMAIJ_8;
109866ed3db0SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_8;
109966ed3db0SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
110066ed3db0SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
11012b692628SMatthew Knepley } else if (dof == 9) {
11022b692628SMatthew Knepley B->ops->mult = MatMult_SeqMAIJ_9;
11032b692628SMatthew Knepley B->ops->multadd = MatMultAdd_SeqMAIJ_9;
11042b692628SMatthew Knepley B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
11052b692628SMatthew Knepley B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
1106b9cda22cSBarry Smith } else if (dof == 10) {
1107b9cda22cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_10;
1108b9cda22cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_10;
1109b9cda22cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
1110b9cda22cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
1111dbdd7285SBarry Smith } else if (dof == 11) {
1112dbdd7285SBarry Smith B->ops->mult = MatMult_SeqMAIJ_11;
1113dbdd7285SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_11;
1114dbdd7285SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11;
1115dbdd7285SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
11162f7816d4SBarry Smith } else if (dof == 16) {
11172f7816d4SBarry Smith B->ops->mult = MatMult_SeqMAIJ_16;
11182f7816d4SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_16;
11192f7816d4SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
11202f7816d4SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1121ed1c418cSBarry Smith } else if (dof == 18) {
1122ed1c418cSBarry Smith B->ops->mult = MatMult_SeqMAIJ_18;
1123ed1c418cSBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_18;
1124ed1c418cSBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
1125ed1c418cSBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
112682b1193eSBarry Smith } else {
11276861f193SBarry Smith B->ops->mult = MatMult_SeqMAIJ_N;
11286861f193SBarry Smith B->ops->multadd = MatMultAdd_SeqMAIJ_N;
11296861f193SBarry Smith B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N;
11306861f193SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
113182b1193eSBarry Smith }
1132fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
11339566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ));
1134fdc842d1SBarry Smith #endif
11359566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ));
11369566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
1137f4a53059SBarry Smith } else {
1138f4a53059SBarry Smith Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1139feb9fe23SJed Brown Mat_MPIMAIJ *b;
1140f4a53059SBarry Smith IS from, to;
1141f4a53059SBarry Smith Vec gvec;
1142f4a53059SBarry Smith
11439566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATMPIMAIJ));
114426fbe8dcSKarl Rupp
11450298fd71SBarry Smith B->ops->setup = NULL;
1146d72c5c08SBarry Smith B->ops->destroy = MatDestroy_MPIMAIJ;
11470fd73130SBarry Smith B->ops->view = MatView_MPIMAIJ;
114826fbe8dcSKarl Rupp
1149b9b97703SBarry Smith b = (Mat_MPIMAIJ *)B->data;
1150b9b97703SBarry Smith b->dof = dof;
1151b9b97703SBarry Smith b->A = A;
115226fbe8dcSKarl Rupp
11539566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ));
11549566063dSJacob Faibussowitsch PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ));
11554cb9d9b8SBarry Smith
11569566063dSJacob Faibussowitsch PetscCall(VecGetSize(mpiaij->lvec, &n));
11579566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_SELF, &b->w));
11589566063dSJacob Faibussowitsch PetscCall(VecSetSizes(b->w, n * dof, n * dof));
11599566063dSJacob Faibussowitsch PetscCall(VecSetBlockSize(b->w, dof));
11609566063dSJacob Faibussowitsch PetscCall(VecSetType(b->w, VECSEQ));
1161f4a53059SBarry Smith
1162f4a53059SBarry Smith /* create two temporary Index sets for build scatter gather */
11639566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
11649566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to));
1165f4a53059SBarry Smith
1166f4a53059SBarry Smith /* create temporary global vector to generate scatter context */
11679566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec));
1168f4a53059SBarry Smith
1169f4a53059SBarry Smith /* generate the scatter context */
11709566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx));
1171f4a53059SBarry Smith
11729566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from));
11739566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to));
11749566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec));
1175f4a53059SBarry Smith
1176f4a53059SBarry Smith B->ops->mult = MatMult_MPIMAIJ_dof;
11774cb9d9b8SBarry Smith B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
11784cb9d9b8SBarry Smith B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
11794cb9d9b8SBarry Smith B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
118026fbe8dcSKarl Rupp
1181fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
11829566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ));
1183fdc842d1SBarry Smith #endif
11849566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ));
11859566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
1186f4a53059SBarry Smith }
11877dae84e0SHong Zhang B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ;
1188ec626240SStefano Zampini B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
11899566063dSJacob Faibussowitsch PetscCall(MatSetUp(B));
1190fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
1191fdc842d1SBarry Smith /* temporary until we have CUDA implementation of MAIJ */
1192fdc842d1SBarry Smith {
1193fdc842d1SBarry Smith PetscBool flg;
1194fdc842d1SBarry Smith if (convert) {
11959566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, ""));
119648a46eb9SPierre Jolivet if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B));
1197fdc842d1SBarry Smith }
1198fdc842d1SBarry Smith }
1199fdc842d1SBarry Smith #endif
1200cd3bbe55SBarry Smith *maij = B;
1201d72c5c08SBarry Smith }
12023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
120382b1193eSBarry Smith }
1204