xref: /petsc/src/mat/impls/maij/maij.c (revision 5ff6d247539c86491dc822dc7e845e819e6cc4a3)
1c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
2c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
382b1193eSBarry Smith 
4b350b9acSSatish Balay /*@
511a5261eSBarry Smith   MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix
6ff585edeSJed Brown 
711a5261eSBarry Smith   Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
8ff585edeSJed Brown 
9ff585edeSJed Brown   Input Parameter:
1011a5261eSBarry Smith . A - the `MATMAIJ` matrix
11ff585edeSJed Brown 
12ff585edeSJed Brown   Output Parameter:
1311a5261eSBarry Smith . B - the `MATAIJ` matrix
14ff585edeSJed Brown 
15ff585edeSJed Brown   Level: advanced
16ff585edeSJed Brown 
1711a5261eSBarry Smith   Note:
1811a5261eSBarry Smith   The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
19ff585edeSJed Brown 
201cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()`
21ff585edeSJed Brown @*/
MatMAIJGetAIJ(Mat A,Mat * B)22d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B)
23d71ae5a4SJacob Faibussowitsch {
24ace3abfcSBarry Smith   PetscBool ismpimaij, isseqmaij;
25b9b97703SBarry Smith 
26b9b97703SBarry Smith   PetscFunctionBegin;
279566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij));
289566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij));
29b9b97703SBarry Smith   if (ismpimaij) {
30b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
31b9b97703SBarry Smith 
32b9b97703SBarry Smith     *B = b->A;
33b9b97703SBarry Smith   } else if (isseqmaij) {
34b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
35b9b97703SBarry Smith 
36b9b97703SBarry Smith     *B = b->AIJ;
37b9b97703SBarry Smith   } else {
38b9b97703SBarry Smith     *B = A;
39b9b97703SBarry Smith   }
403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
41b9b97703SBarry Smith }
42b9b97703SBarry Smith 
43480dffcfSJed Brown /*@
4411a5261eSBarry Smith   MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size
45ff585edeSJed Brown 
463f9fe445SBarry Smith   Logically Collective
47ff585edeSJed Brown 
48d8d19677SJose E. Roman   Input Parameters:
4911a5261eSBarry Smith + A   - the `MATMAIJ` matrix
50ff585edeSJed Brown - dof - the block size for the new matrix
51ff585edeSJed Brown 
52ff585edeSJed Brown   Output Parameter:
5311a5261eSBarry Smith . B - the new `MATMAIJ` matrix
54ff585edeSJed Brown 
55ff585edeSJed Brown   Level: advanced
56ff585edeSJed Brown 
571cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MatCreateMAIJ()`
58ff585edeSJed Brown @*/
MatMAIJRedimension(Mat A,PetscInt dof,Mat * B)59d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B)
60d71ae5a4SJacob Faibussowitsch {
610298fd71SBarry Smith   Mat Aij = NULL;
62b9b97703SBarry Smith 
63b9b97703SBarry Smith   PetscFunctionBegin;
64c5eb9154SBarry Smith   PetscValidLogicalCollectiveInt(A, dof, 2);
659566063dSJacob Faibussowitsch   PetscCall(MatMAIJGetAIJ(A, &Aij));
669566063dSJacob Faibussowitsch   PetscCall(MatCreateMAIJ(Aij, dof, B));
673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
68b9b97703SBarry Smith }
69b9b97703SBarry Smith 
MatDestroy_SeqMAIJ(Mat A)7080eb40d0SJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
71d71ae5a4SJacob Faibussowitsch {
724cb9d9b8SBarry Smith   Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
7382b1193eSBarry Smith 
7482b1193eSBarry Smith   PetscFunctionBegin;
759566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->AIJ));
769566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
772e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL));
789566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL));
799566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL));
803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
814cb9d9b8SBarry Smith }
824cb9d9b8SBarry Smith 
MatSetUp_MAIJ(Mat A)8380eb40d0SJacob Faibussowitsch static PetscErrorCode MatSetUp_MAIJ(Mat A)
84d71ae5a4SJacob Faibussowitsch {
85356c569eSBarry Smith   PetscFunctionBegin;
86ce94432eSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
87356c569eSBarry Smith }
88356c569eSBarry Smith 
MatView_SeqMAIJ(Mat A,PetscViewer viewer)8980eb40d0SJacob Faibussowitsch static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer)
90d71ae5a4SJacob Faibussowitsch {
910fd73130SBarry Smith   Mat B;
920fd73130SBarry Smith 
930fd73130SBarry Smith   PetscFunctionBegin;
949566063dSJacob Faibussowitsch   PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
959566063dSJacob Faibussowitsch   PetscCall(MatView(B, viewer));
969566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
973ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
980fd73130SBarry Smith }
990fd73130SBarry Smith 
MatView_MPIMAIJ(Mat A,PetscViewer viewer)10080eb40d0SJacob Faibussowitsch static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer)
101d71ae5a4SJacob Faibussowitsch {
1020fd73130SBarry Smith   Mat B;
1030fd73130SBarry Smith 
1040fd73130SBarry Smith   PetscFunctionBegin;
1059566063dSJacob Faibussowitsch   PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B));
1069566063dSJacob Faibussowitsch   PetscCall(MatView(B, viewer));
1079566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
1083ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1090fd73130SBarry Smith }
1100fd73130SBarry Smith 
MatDestroy_MPIMAIJ(Mat A)11180eb40d0SJacob Faibussowitsch static PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
112d71ae5a4SJacob Faibussowitsch {
1134cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
1144cb9d9b8SBarry Smith 
1154cb9d9b8SBarry Smith   PetscFunctionBegin;
1169566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->AIJ));
1179566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->OAIJ));
1189566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->A));
1199566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->ctx));
1209566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->w));
1219566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
1222e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL));
1239566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL));
1249566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL));
1259566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
127b9b97703SBarry Smith }
128b9b97703SBarry Smith 
1290bad9183SKris Buschelman /*MC
130fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1310bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
13211a5261eSBarry Smith   The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices.
1330bad9183SKris Buschelman 
1340bad9183SKris Buschelman   Operations provided:
13567b8a455SSatish Balay .vb
13611a5261eSBarry Smith     MatMult()
13711a5261eSBarry Smith     MatMultTranspose()
13811a5261eSBarry Smith     MatMultAdd()
13911a5261eSBarry Smith     MatMultTransposeAdd()
14067b8a455SSatish Balay .ve
1410bad9183SKris Buschelman 
1420bad9183SKris Buschelman   Level: advanced
1430bad9183SKris Buschelman 
1441cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
1450bad9183SKris Buschelman M*/
1460bad9183SKris Buschelman 
MatCreate_MAIJ(Mat A)147d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
148d71ae5a4SJacob Faibussowitsch {
1494cb9d9b8SBarry Smith   Mat_MPIMAIJ *b;
15064b52464SHong Zhang   PetscMPIInt  size;
15182b1193eSBarry Smith 
15282b1193eSBarry Smith   PetscFunctionBegin;
1534dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
154b0a32e0cSBarry Smith   A->data = (void *)b;
15526fbe8dcSKarl Rupp 
1569566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
15726fbe8dcSKarl Rupp 
158356c569eSBarry Smith   A->ops->setup = MatSetUp_MAIJ;
159f4a53059SBarry Smith 
160f4259b30SLisandro Dalcin   b->AIJ  = NULL;
161cd3bbe55SBarry Smith   b->dof  = 0;
162f4259b30SLisandro Dalcin   b->OAIJ = NULL;
163f4259b30SLisandro Dalcin   b->ctx  = NULL;
164f4259b30SLisandro Dalcin   b->w    = NULL;
1659566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
16664b52464SHong Zhang   if (size == 1) {
1679566063dSJacob Faibussowitsch     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ));
16864b52464SHong Zhang   } else {
1699566063dSJacob Faibussowitsch     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ));
17064b52464SHong Zhang   }
17132e7c8b0SBarry Smith   A->preallocated = PETSC_TRUE;
17232e7c8b0SBarry Smith   A->assembled    = PETSC_TRUE;
1733ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17482b1193eSBarry Smith }
17582b1193eSBarry Smith 
17680eb40d0SJacob Faibussowitsch #if PetscHasAttribute(always_inline)
17780eb40d0SJacob Faibussowitsch   #define PETSC_FORCE_INLINE __attribute__((always_inline))
17880eb40d0SJacob Faibussowitsch #else
17980eb40d0SJacob Faibussowitsch   #define PETSC_FORCE_INLINE
18080eb40d0SJacob Faibussowitsch #endif
181bb5b24f5SJacob Faibussowitsch 
1829927e305SJacob Faibussowitsch #if defined(__clang__)
183bb5b24f5SJacob Faibussowitsch   #define PETSC_PRAGMA_UNROLL _Pragma("unroll")
184bb5b24f5SJacob Faibussowitsch #else
185bb5b24f5SJacob Faibussowitsch   #define PETSC_PRAGMA_UNROLL
186bb5b24f5SJacob Faibussowitsch #endif
187bb5b24f5SJacob Faibussowitsch 
18880eb40d0SJacob Faibussowitsch enum {
18980eb40d0SJacob Faibussowitsch   MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18
19080eb40d0SJacob Faibussowitsch };
19180eb40d0SJacob Faibussowitsch 
19280eb40d0SJacob Faibussowitsch // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline'
19380eb40d0SJacob Faibussowitsch // keyword into account for these...
MatMult_MatMultAdd_SeqMAIJ_Template(Mat A,Vec xx,Vec yy,Vec zz,int N)19480eb40d0SJacob Faibussowitsch PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
19580eb40d0SJacob Faibussowitsch {
19680eb40d0SJacob Faibussowitsch   const PetscBool    mult_add   = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
19780eb40d0SJacob Faibussowitsch   const Mat_SeqMAIJ *b          = (Mat_SeqMAIJ *)A->data;
19880eb40d0SJacob Faibussowitsch   const Mat          baij       = b->AIJ;
19980eb40d0SJacob Faibussowitsch   const Mat_SeqAIJ  *a          = (Mat_SeqAIJ *)baij->data;
20080eb40d0SJacob Faibussowitsch   const PetscInt     m          = baij->rmap->n;
20180eb40d0SJacob Faibussowitsch   const PetscInt     nz         = a->nz;
20280eb40d0SJacob Faibussowitsch   const PetscInt    *idx        = a->j;
20380eb40d0SJacob Faibussowitsch   const PetscInt    *ii         = a->i;
20480eb40d0SJacob Faibussowitsch   const PetscScalar *v          = a->a;
20580eb40d0SJacob Faibussowitsch   PetscInt           nonzerorow = 0;
20680eb40d0SJacob Faibussowitsch   const PetscScalar *x;
20780eb40d0SJacob Faibussowitsch   PetscScalar       *z;
20880eb40d0SJacob Faibussowitsch 
20980eb40d0SJacob Faibussowitsch   PetscFunctionBegin;
21080eb40d0SJacob Faibussowitsch   PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
21180eb40d0SJacob Faibussowitsch   if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz));
21280eb40d0SJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
21380eb40d0SJacob Faibussowitsch   if (mult_add) {
21480eb40d0SJacob Faibussowitsch     PetscCall(VecGetArray(zz, &z));
21580eb40d0SJacob Faibussowitsch   } else {
21680eb40d0SJacob Faibussowitsch     PetscCall(VecGetArrayWrite(zz, &z));
21780eb40d0SJacob Faibussowitsch   }
21880eb40d0SJacob Faibussowitsch 
21980eb40d0SJacob Faibussowitsch   for (PetscInt i = 0; i < m; ++i) {
22080eb40d0SJacob Faibussowitsch     PetscInt       jrow = ii[i];
22180eb40d0SJacob Faibussowitsch     const PetscInt n    = ii[i + 1] - jrow;
22280eb40d0SJacob Faibussowitsch     // leave a line so clang-format does not align these decls
22380eb40d0SJacob Faibussowitsch     PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0};
22480eb40d0SJacob Faibussowitsch 
22580eb40d0SJacob Faibussowitsch     nonzerorow += n > 0;
22680eb40d0SJacob Faibussowitsch     for (PetscInt j = 0; j < n; ++j, ++jrow) {
22780eb40d0SJacob Faibussowitsch       const PetscScalar v_jrow     = v[jrow];
22880eb40d0SJacob Faibussowitsch       const PetscInt    N_idx_jrow = N * idx[jrow];
22980eb40d0SJacob Faibussowitsch 
230bb5b24f5SJacob Faibussowitsch       PETSC_PRAGMA_UNROLL
23180eb40d0SJacob Faibussowitsch       for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k];
23280eb40d0SJacob Faibussowitsch     }
23380eb40d0SJacob Faibussowitsch 
234bb5b24f5SJacob Faibussowitsch     PETSC_PRAGMA_UNROLL
23580eb40d0SJacob Faibussowitsch     for (int k = 0; k < N; ++k) {
23680eb40d0SJacob Faibussowitsch       const PetscInt z_idx = N * i + k;
23780eb40d0SJacob Faibussowitsch 
23880eb40d0SJacob Faibussowitsch       if (mult_add) {
23980eb40d0SJacob Faibussowitsch         z[z_idx] += sum[k];
24080eb40d0SJacob Faibussowitsch       } else {
24180eb40d0SJacob Faibussowitsch         z[z_idx] = sum[k];
24280eb40d0SJacob Faibussowitsch       }
24380eb40d0SJacob Faibussowitsch     }
24480eb40d0SJacob Faibussowitsch   }
24580eb40d0SJacob Faibussowitsch   PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow))));
24680eb40d0SJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
24780eb40d0SJacob Faibussowitsch   if (mult_add) {
24880eb40d0SJacob Faibussowitsch     PetscCall(VecRestoreArray(zz, &z));
24980eb40d0SJacob Faibussowitsch   } else {
25080eb40d0SJacob Faibussowitsch     PetscCall(VecRestoreArrayWrite(zz, &z));
25180eb40d0SJacob Faibussowitsch   }
25280eb40d0SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25380eb40d0SJacob Faibussowitsch }
25480eb40d0SJacob Faibussowitsch 
MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A,Vec xx,Vec yy,Vec zz,int N)25580eb40d0SJacob Faibussowitsch PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
25680eb40d0SJacob Faibussowitsch {
25780eb40d0SJacob Faibussowitsch   const PetscBool    mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
25880eb40d0SJacob Faibussowitsch   const Mat_SeqMAIJ *b        = (Mat_SeqMAIJ *)A->data;
25980eb40d0SJacob Faibussowitsch   const Mat          baij     = b->AIJ;
26080eb40d0SJacob Faibussowitsch   const Mat_SeqAIJ  *a        = (Mat_SeqAIJ *)baij->data;
26180eb40d0SJacob Faibussowitsch   const PetscInt     m        = baij->rmap->n;
26280eb40d0SJacob Faibussowitsch   const PetscInt     nz       = a->nz;
26380eb40d0SJacob Faibussowitsch   const PetscInt    *a_j      = a->j;
26480eb40d0SJacob Faibussowitsch   const PetscInt    *a_i      = a->i;
26580eb40d0SJacob Faibussowitsch   const PetscScalar *a_a      = a->a;
26680eb40d0SJacob Faibussowitsch   const PetscScalar *x;
26780eb40d0SJacob Faibussowitsch   PetscScalar       *z;
26880eb40d0SJacob Faibussowitsch 
26980eb40d0SJacob Faibussowitsch   PetscFunctionBegin;
27080eb40d0SJacob Faibussowitsch   PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
27180eb40d0SJacob Faibussowitsch   if (mult_add) {
27280eb40d0SJacob Faibussowitsch     if (yy != zz) PetscCall(VecCopy(yy, zz));
27380eb40d0SJacob Faibussowitsch   } else {
27480eb40d0SJacob Faibussowitsch     PetscCall(VecSet(zz, 0.0));
27580eb40d0SJacob Faibussowitsch   }
27680eb40d0SJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
27780eb40d0SJacob Faibussowitsch   PetscCall(VecGetArray(zz, &z));
27880eb40d0SJacob Faibussowitsch 
27980eb40d0SJacob Faibussowitsch   for (PetscInt i = 0; i < m; i++) {
28080eb40d0SJacob Faibussowitsch     const PetscInt     a_ii = a_i[i];
2818e3a54c0SPierre Jolivet     const PetscInt    *idx  = PetscSafePointerPlusOffset(a_j, a_ii);
2828e3a54c0SPierre Jolivet     const PetscScalar *v    = PetscSafePointerPlusOffset(a_a, a_ii);
28380eb40d0SJacob Faibussowitsch     const PetscInt     n    = a_i[i + 1] - a_ii;
28480eb40d0SJacob Faibussowitsch     PetscScalar        alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE];
28580eb40d0SJacob Faibussowitsch 
286bb5b24f5SJacob Faibussowitsch     PETSC_PRAGMA_UNROLL
28780eb40d0SJacob Faibussowitsch     for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k];
28880eb40d0SJacob Faibussowitsch     for (PetscInt j = 0; j < n; ++j) {
28980eb40d0SJacob Faibussowitsch       const PetscInt    N_idx_j = N * idx[j];
29080eb40d0SJacob Faibussowitsch       const PetscScalar v_j     = v[j];
29180eb40d0SJacob Faibussowitsch 
292bb5b24f5SJacob Faibussowitsch       PETSC_PRAGMA_UNROLL
29380eb40d0SJacob Faibussowitsch       for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j;
29480eb40d0SJacob Faibussowitsch     }
29580eb40d0SJacob Faibussowitsch   }
29680eb40d0SJacob Faibussowitsch 
29780eb40d0SJacob Faibussowitsch   PetscCall(PetscLogFlops(2 * N * nz));
29880eb40d0SJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
29980eb40d0SJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &z));
30080eb40d0SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
30180eb40d0SJacob Faibussowitsch }
30280eb40d0SJacob Faibussowitsch 
3039927e305SJacob Faibussowitsch #define MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(N) \
3049927e305SJacob Faibussowitsch   static PetscErrorCode PetscConcat(MatMult_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
3059927e305SJacob Faibussowitsch   { \
3069927e305SJacob Faibussowitsch     PetscFunctionBegin; \
3079927e305SJacob Faibussowitsch     PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
3089927e305SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS); \
3099927e305SJacob Faibussowitsch   } \
3109927e305SJacob Faibussowitsch   static PetscErrorCode PetscConcat(MatMultTranspose_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
3119927e305SJacob Faibussowitsch   { \
3129927e305SJacob Faibussowitsch     PetscFunctionBegin; \
3139927e305SJacob Faibussowitsch     PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
3149927e305SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS); \
3159927e305SJacob Faibussowitsch   } \
3169927e305SJacob Faibussowitsch   static PetscErrorCode PetscConcat(MatMultAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
3179927e305SJacob Faibussowitsch   { \
3189927e305SJacob Faibussowitsch     PetscFunctionBegin; \
3199927e305SJacob Faibussowitsch     PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
3209927e305SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS); \
3219927e305SJacob Faibussowitsch   } \
3229927e305SJacob Faibussowitsch   static PetscErrorCode PetscConcat(MatMultTransposeAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
3239927e305SJacob Faibussowitsch   { \
3249927e305SJacob Faibussowitsch     PetscFunctionBegin; \
3259927e305SJacob Faibussowitsch     PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
3269927e305SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS); \
32782b1193eSBarry Smith   }
328bcc973b7SBarry Smith 
3299927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(2)
3309927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(3)
3319927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(4)
3329927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(5)
3339927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(6)
3349927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(7)
3359927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(8)
3369927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(9)
3379927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(10)
3389927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(11)
3399927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(16)
3409927e305SJacob Faibussowitsch MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(18)
341bcc973b7SBarry Smith 
3429927e305SJacob Faibussowitsch #undef MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE
343ed1c418cSBarry Smith 
MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)34480eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
345d71ae5a4SJacob Faibussowitsch {
3466861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
3476861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
3486861f193SBarry Smith   const PetscScalar *x, *v;
3496861f193SBarry Smith   PetscScalar       *y, *sums;
3506861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
3516861f193SBarry Smith   PetscInt           n, i, jrow, j, dof = b->dof, k;
3526861f193SBarry Smith 
3536861f193SBarry Smith   PetscFunctionBegin;
3549566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3559566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
3569566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
3576861f193SBarry Smith   idx = a->j;
3586861f193SBarry Smith   v   = a->a;
3596861f193SBarry Smith   ii  = a->i;
3606861f193SBarry Smith 
3616861f193SBarry Smith   for (i = 0; i < m; i++) {
3626861f193SBarry Smith     jrow = ii[i];
3636861f193SBarry Smith     n    = ii[i + 1] - jrow;
3646861f193SBarry Smith     sums = y + dof * i;
3656861f193SBarry Smith     for (j = 0; j < n; j++) {
366ad540459SPierre Jolivet       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
3676861f193SBarry Smith       jrow++;
3686861f193SBarry Smith     }
3696861f193SBarry Smith   }
3706861f193SBarry Smith 
3719566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
3729566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3739566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
3743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3756861f193SBarry Smith }
3766861f193SBarry Smith 
MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)37780eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
378d71ae5a4SJacob Faibussowitsch {
3796861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
3806861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
3816861f193SBarry Smith   const PetscScalar *x, *v;
3826861f193SBarry Smith   PetscScalar       *y, *sums;
3836861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
3846861f193SBarry Smith   PetscInt           n, i, jrow, j, dof = b->dof, k;
3856861f193SBarry Smith 
3866861f193SBarry Smith   PetscFunctionBegin;
3879566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
3889566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3899566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
3906861f193SBarry Smith   idx = a->j;
3916861f193SBarry Smith   v   = a->a;
3926861f193SBarry Smith   ii  = a->i;
3936861f193SBarry Smith 
3946861f193SBarry Smith   for (i = 0; i < m; i++) {
3956861f193SBarry Smith     jrow = ii[i];
3966861f193SBarry Smith     n    = ii[i + 1] - jrow;
3976861f193SBarry Smith     sums = y + dof * i;
3986861f193SBarry Smith     for (j = 0; j < n; j++) {
399ad540459SPierre Jolivet       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
4006861f193SBarry Smith       jrow++;
4016861f193SBarry Smith     }
4026861f193SBarry Smith   }
4036861f193SBarry Smith 
4049566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
4059566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4069566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
4073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4086861f193SBarry Smith }
4096861f193SBarry Smith 
MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)41080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
411d71ae5a4SJacob Faibussowitsch {
4126861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
4136861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
4146861f193SBarry Smith   const PetscScalar *x, *v, *alpha;
4156861f193SBarry Smith   PetscScalar       *y;
4166861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
4176861f193SBarry Smith   PetscInt           n, i, k;
4186861f193SBarry Smith 
4196861f193SBarry Smith   PetscFunctionBegin;
4209566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4219566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
4229566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
4236861f193SBarry Smith   for (i = 0; i < m; i++) {
4248e3a54c0SPierre Jolivet     idx   = PetscSafePointerPlusOffset(a->j, a->i[i]);
4258e3a54c0SPierre Jolivet     v     = PetscSafePointerPlusOffset(a->a, a->i[i]);
4266861f193SBarry Smith     n     = a->i[i + 1] - a->i[i];
4276861f193SBarry Smith     alpha = x + dof * i;
4286861f193SBarry Smith     while (n-- > 0) {
429ad540459SPierre Jolivet       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
4309371c9d4SSatish Balay       idx++;
4319371c9d4SSatish Balay       v++;
4326861f193SBarry Smith     }
4336861f193SBarry Smith   }
4349566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
4359566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4369566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
4373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4386861f193SBarry Smith }
4396861f193SBarry Smith 
MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)44080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
441d71ae5a4SJacob Faibussowitsch {
4426861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
4436861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
4446861f193SBarry Smith   const PetscScalar *x, *v, *alpha;
4456861f193SBarry Smith   PetscScalar       *y;
4466861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
4476861f193SBarry Smith   PetscInt           n, i, k;
4486861f193SBarry Smith 
4496861f193SBarry Smith   PetscFunctionBegin;
4509566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
4519566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4529566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
4536861f193SBarry Smith   for (i = 0; i < m; i++) {
4546861f193SBarry Smith     idx   = a->j + a->i[i];
4556861f193SBarry Smith     v     = a->a + a->i[i];
4566861f193SBarry Smith     n     = a->i[i + 1] - a->i[i];
4576861f193SBarry Smith     alpha = x + dof * i;
4586861f193SBarry Smith     while (n-- > 0) {
459ad540459SPierre Jolivet       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
4609371c9d4SSatish Balay       idx++;
4619371c9d4SSatish Balay       v++;
4626861f193SBarry Smith     }
4636861f193SBarry Smith   }
4649566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
4659566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4669566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
4673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4686861f193SBarry Smith }
4696861f193SBarry Smith 
MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)47080eb40d0SJacob Faibussowitsch static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
471d71ae5a4SJacob Faibussowitsch {
4724cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
473f4a53059SBarry Smith 
474b24ad042SBarry Smith   PetscFunctionBegin;
475f4a53059SBarry Smith   /* start the scatter */
4769566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
4779566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy));
4789566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
4799566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy));
4803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
481f4a53059SBarry Smith }
482f4a53059SBarry Smith 
MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)48380eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
484d71ae5a4SJacob Faibussowitsch {
4854cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
486b24ad042SBarry Smith 
4874cb9d9b8SBarry Smith   PetscFunctionBegin;
4889566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
4899566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy));
4909566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
4919566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
4923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4934cb9d9b8SBarry Smith }
4944cb9d9b8SBarry Smith 
MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)49580eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
496d71ae5a4SJacob Faibussowitsch {
4974cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
4984cb9d9b8SBarry Smith 
499b24ad042SBarry Smith   PetscFunctionBegin;
5004cb9d9b8SBarry Smith   /* start the scatter */
5019566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
5029566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz));
5039566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
5049566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
5053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
5064cb9d9b8SBarry Smith }
5074cb9d9b8SBarry Smith 
MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)50880eb40d0SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
509d71ae5a4SJacob Faibussowitsch {
5104cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
511b24ad042SBarry Smith 
5124cb9d9b8SBarry Smith   PetscFunctionBegin;
5139566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
5149566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz));
5159566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
5169566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
5173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
5184cb9d9b8SBarry Smith }
5194cb9d9b8SBarry Smith 
MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)52080eb40d0SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
521d71ae5a4SJacob Faibussowitsch {
5224222ddf1SHong Zhang   Mat_Product *product = C->product;
5234222ddf1SHong Zhang 
5244222ddf1SHong Zhang   PetscFunctionBegin;
525966bd95aSPierre Jolivet   PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
5264222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
5273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
5284222ddf1SHong Zhang }
5294222ddf1SHong Zhang 
MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)53080eb40d0SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
531d71ae5a4SJacob Faibussowitsch {
5324222ddf1SHong Zhang   Mat_Product *product = C->product;
5334222ddf1SHong Zhang   PetscBool    flg     = PETSC_FALSE;
5344222ddf1SHong Zhang   Mat          A = product->A, P = product->B;
5354222ddf1SHong Zhang   PetscInt     alg = 1; /* set default algorithm */
5364222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE)
5374222ddf1SHong Zhang   const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
5384222ddf1SHong Zhang   PetscInt    nalg        = 4;
5394222ddf1SHong Zhang #else
5404222ddf1SHong Zhang   const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
5414222ddf1SHong Zhang   PetscInt    nalg        = 5;
5424222ddf1SHong Zhang #endif
5434222ddf1SHong Zhang 
5444222ddf1SHong Zhang   PetscFunctionBegin;
54508401ef6SPierre Jolivet   PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]);
5464222ddf1SHong Zhang 
5474222ddf1SHong Zhang   /* PtAP */
5484222ddf1SHong Zhang   /* Check matrix local sizes */
5499371c9d4SSatish Balay   PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5509371c9d4SSatish Balay              A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
5519371c9d4SSatish Balay   PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5529371c9d4SSatish Balay              A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);
5534222ddf1SHong Zhang 
5544222ddf1SHong Zhang   /* Set the default algorithm */
5559566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
556835f2295SStefano Zampini   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
5574222ddf1SHong Zhang 
5584222ddf1SHong Zhang   /* Get runtime option */
559d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
5609566063dSJacob Faibussowitsch   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
561835f2295SStefano Zampini   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
562d0609cedSBarry Smith   PetscOptionsEnd();
5634222ddf1SHong Zhang 
5649566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg));
5654222ddf1SHong Zhang   if (flg) {
5664222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
5673ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
5684222ddf1SHong Zhang   }
5694222ddf1SHong Zhang 
5709566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg));
5714222ddf1SHong Zhang   if (flg) {
5724222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
5733ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
5744222ddf1SHong Zhang   }
5754222ddf1SHong Zhang 
5764222ddf1SHong Zhang   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
577835f2295SStefano Zampini   PetscCall(PetscInfo(A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
5789566063dSJacob Faibussowitsch   PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P));
5799566063dSJacob Faibussowitsch   PetscCall(MatProductSetFromOptions(C));
5803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
5814222ddf1SHong Zhang }
5824222ddf1SHong Zhang 
MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)58380eb40d0SJacob Faibussowitsch static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C)
58480eb40d0SJacob Faibussowitsch {
58580eb40d0SJacob Faibussowitsch   /* This routine requires testing -- first draft only */
58680eb40d0SJacob Faibussowitsch   Mat_SeqMAIJ     *pp = (Mat_SeqMAIJ *)PP->data;
58780eb40d0SJacob Faibussowitsch   Mat              P  = pp->AIJ;
58880eb40d0SJacob Faibussowitsch   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
58980eb40d0SJacob Faibussowitsch   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *)P->data;
59080eb40d0SJacob Faibussowitsch   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *)C->data;
59180eb40d0SJacob Faibussowitsch   const PetscInt  *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
59280eb40d0SJacob Faibussowitsch   const PetscInt  *ci = c->i, *cj = c->j, *cjj;
59380eb40d0SJacob Faibussowitsch   const PetscInt   am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
59480eb40d0SJacob Faibussowitsch   PetscInt         i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
59580eb40d0SJacob Faibussowitsch   const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
59680eb40d0SJacob Faibussowitsch   MatScalar       *ca = c->a, *caj, *apa;
59780eb40d0SJacob Faibussowitsch 
59880eb40d0SJacob Faibussowitsch   PetscFunctionBegin;
59980eb40d0SJacob Faibussowitsch   /* Allocate temporary array for storage of one row of A*P */
60080eb40d0SJacob Faibussowitsch   PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense));
60180eb40d0SJacob Faibussowitsch 
60280eb40d0SJacob Faibussowitsch   /* Clear old values in C */
60380eb40d0SJacob Faibussowitsch   PetscCall(PetscArrayzero(ca, ci[cm]));
60480eb40d0SJacob Faibussowitsch 
60580eb40d0SJacob Faibussowitsch   for (i = 0; i < am; i++) {
60680eb40d0SJacob Faibussowitsch     /* Form sparse row of A*P */
60780eb40d0SJacob Faibussowitsch     anzi  = ai[i + 1] - ai[i];
60880eb40d0SJacob Faibussowitsch     apnzj = 0;
60980eb40d0SJacob Faibussowitsch     for (j = 0; j < anzi; j++) {
61080eb40d0SJacob Faibussowitsch       /* Get offset within block of P */
61180eb40d0SJacob Faibussowitsch       pshift = *aj % ppdof;
61280eb40d0SJacob Faibussowitsch       /* Get block row of P */
61380eb40d0SJacob Faibussowitsch       prow = *aj++ / ppdof; /* integer division */
61480eb40d0SJacob Faibussowitsch       pnzj = pi[prow + 1] - pi[prow];
61580eb40d0SJacob Faibussowitsch       pjj  = pj + pi[prow];
61680eb40d0SJacob Faibussowitsch       paj  = pa + pi[prow];
61780eb40d0SJacob Faibussowitsch       for (k = 0; k < pnzj; k++) {
61880eb40d0SJacob Faibussowitsch         poffset = pjj[k] * ppdof + pshift;
61980eb40d0SJacob Faibussowitsch         if (!apjdense[poffset]) {
62080eb40d0SJacob Faibussowitsch           apjdense[poffset] = -1;
62180eb40d0SJacob Faibussowitsch           apj[apnzj++]      = poffset;
62280eb40d0SJacob Faibussowitsch         }
62380eb40d0SJacob Faibussowitsch         apa[poffset] += (*aa) * paj[k];
62480eb40d0SJacob Faibussowitsch       }
62580eb40d0SJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * pnzj));
62680eb40d0SJacob Faibussowitsch       aa++;
62780eb40d0SJacob Faibussowitsch     }
62880eb40d0SJacob Faibussowitsch 
62980eb40d0SJacob Faibussowitsch     /* Sort the j index array for quick sparse axpy. */
63080eb40d0SJacob Faibussowitsch     /* Note: a array does not need sorting as it is in dense storage locations. */
63180eb40d0SJacob Faibussowitsch     PetscCall(PetscSortInt(apnzj, apj));
63280eb40d0SJacob Faibussowitsch 
63380eb40d0SJacob Faibussowitsch     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
63480eb40d0SJacob Faibussowitsch     prow    = i / ppdof; /* integer division */
63580eb40d0SJacob Faibussowitsch     pshift  = i % ppdof;
63680eb40d0SJacob Faibussowitsch     poffset = pi[prow];
63780eb40d0SJacob Faibussowitsch     pnzi    = pi[prow + 1] - poffset;
63880eb40d0SJacob Faibussowitsch     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
63980eb40d0SJacob Faibussowitsch     pJ = pj + poffset;
64080eb40d0SJacob Faibussowitsch     pA = pa + poffset;
64180eb40d0SJacob Faibussowitsch     for (j = 0; j < pnzi; j++) {
64280eb40d0SJacob Faibussowitsch       crow = (*pJ) * ppdof + pshift;
64380eb40d0SJacob Faibussowitsch       cjj  = cj + ci[crow];
64480eb40d0SJacob Faibussowitsch       caj  = ca + ci[crow];
64580eb40d0SJacob Faibussowitsch       pJ++;
64680eb40d0SJacob Faibussowitsch       /* Perform sparse axpy operation.  Note cjj includes apj. */
64780eb40d0SJacob Faibussowitsch       for (k = 0, nextap = 0; nextap < apnzj; k++) {
64880eb40d0SJacob Faibussowitsch         if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
64980eb40d0SJacob Faibussowitsch       }
65080eb40d0SJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * apnzj));
65180eb40d0SJacob Faibussowitsch       pA++;
65280eb40d0SJacob Faibussowitsch     }
65380eb40d0SJacob Faibussowitsch 
65480eb40d0SJacob Faibussowitsch     /* Zero the current row info for A*P */
65580eb40d0SJacob Faibussowitsch     for (j = 0; j < apnzj; j++) {
65680eb40d0SJacob Faibussowitsch       apa[apj[j]]      = 0.;
65780eb40d0SJacob Faibussowitsch       apjdense[apj[j]] = 0;
65880eb40d0SJacob Faibussowitsch     }
65980eb40d0SJacob Faibussowitsch   }
66080eb40d0SJacob Faibussowitsch 
66180eb40d0SJacob Faibussowitsch   /* Assemble the final matrix and clean up */
66280eb40d0SJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
66380eb40d0SJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
66480eb40d0SJacob Faibussowitsch   PetscCall(PetscFree3(apa, apj, apjdense));
66580eb40d0SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
66680eb40d0SJacob Faibussowitsch }
66780eb40d0SJacob Faibussowitsch 
MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)66880eb40d0SJacob Faibussowitsch static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C)
669d71ae5a4SJacob Faibussowitsch {
6700298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
6717ba1a0bfSKris Buschelman   Mat_SeqMAIJ       *pp = (Mat_SeqMAIJ *)PP->data;
6727ba1a0bfSKris Buschelman   Mat                P  = pp->AIJ;
6737ba1a0bfSKris Buschelman   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
674d2840e09SBarry Smith   PetscInt          *pti, *ptj, *ptJ;
6757ba1a0bfSKris Buschelman   PetscInt          *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
676d2840e09SBarry Smith   const PetscInt     an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
677d2840e09SBarry Smith   PetscInt           i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
6787ba1a0bfSKris Buschelman   MatScalar         *ca;
679d2840e09SBarry Smith   const PetscInt    *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;
6807ba1a0bfSKris Buschelman 
6817ba1a0bfSKris Buschelman   PetscFunctionBegin;
6827ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
6839566063dSJacob Faibussowitsch   PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
6847ba1a0bfSKris Buschelman 
6857ba1a0bfSKris Buschelman   cn = pn * ppdof;
6867ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
6877ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
6889566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(cn + 1, &ci));
6897ba1a0bfSKris Buschelman   ci[0] = 0;
6907ba1a0bfSKris Buschelman 
6917ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
6929566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow));
6939566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(ptadenserow, an));
6949566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(denserow, cn));
6957ba1a0bfSKris Buschelman 
6967ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
6977ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
6987ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
6999566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space));
7007ba1a0bfSKris Buschelman   current_space = free_space;
7017ba1a0bfSKris Buschelman 
7027ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
7037ba1a0bfSKris Buschelman   for (i = 0; i < pn; i++) {
7047ba1a0bfSKris Buschelman     ptnzi = pti[i + 1] - pti[i];
7057ba1a0bfSKris Buschelman     ptJ   = ptj + pti[i];
7067ba1a0bfSKris Buschelman     for (dof = 0; dof < ppdof; dof++) {
7077ba1a0bfSKris Buschelman       ptanzi = 0;
7087ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
7097ba1a0bfSKris Buschelman       for (j = 0; j < ptnzi; j++) {
7107ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
7117ba1a0bfSKris Buschelman         arow = ptJ[j] * ppdof + dof;
7127ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
7137ba1a0bfSKris Buschelman         anzj = ai[arow + 1] - ai[arow];
7147ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
7157ba1a0bfSKris Buschelman         for (k = 0; k < anzj; k++) {
7167ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
7177ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
7187ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
7197ba1a0bfSKris Buschelman           }
7207ba1a0bfSKris Buschelman         }
7217ba1a0bfSKris Buschelman       }
7227ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
7237ba1a0bfSKris Buschelman       ptaj = ptasparserow;
7247ba1a0bfSKris Buschelman       cnzi = 0;
7257ba1a0bfSKris Buschelman       for (j = 0; j < ptanzi; j++) {
7267ba1a0bfSKris Buschelman         /* Get offset within block of P */
7277ba1a0bfSKris Buschelman         pshift = *ptaj % ppdof;
7287ba1a0bfSKris Buschelman         /* Get block row of P */
7297ba1a0bfSKris Buschelman         prow = (*ptaj++) / ppdof; /* integer division */
7307ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
7317ba1a0bfSKris Buschelman         pnzj = pi[prow + 1] - pi[prow];
7327ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
7337ba1a0bfSKris Buschelman         for (k = 0; k < pnzj; k++) {
7347ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
7357ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
7367ba1a0bfSKris Buschelman           if (!denserow[pjj[k] * ppdof + pshift]) {
7377ba1a0bfSKris Buschelman             denserow[pjj[k] * ppdof + pshift] = -1;
7387ba1a0bfSKris Buschelman             sparserow[cnzi++]                 = pjj[k] * ppdof + pshift;
7397ba1a0bfSKris Buschelman           }
7407ba1a0bfSKris Buschelman         }
7417ba1a0bfSKris Buschelman       }
7427ba1a0bfSKris Buschelman 
7437ba1a0bfSKris Buschelman       /* sort sparserow */
7449566063dSJacob Faibussowitsch       PetscCall(PetscSortInt(cnzi, sparserow));
7457ba1a0bfSKris Buschelman 
7467ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
7477ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
74848a46eb9SPierre Jolivet       if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), &current_space));
7497ba1a0bfSKris Buschelman 
7507ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
7519566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi));
75226fbe8dcSKarl Rupp 
7537ba1a0bfSKris Buschelman       current_space->array += cnzi;
7547ba1a0bfSKris Buschelman       current_space->local_used += cnzi;
7557ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
7567ba1a0bfSKris Buschelman 
75726fbe8dcSKarl Rupp       for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
75826fbe8dcSKarl Rupp       for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;
75926fbe8dcSKarl Rupp 
7607ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
7617ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
7627ba1a0bfSKris Buschelman       ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
7637ba1a0bfSKris Buschelman     }
7647ba1a0bfSKris Buschelman   }
7657ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
7667ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
7677ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
768*84648c2dSPierre Jolivet   PetscCall(PetscMalloc1(ci[cn], &cj));
7699566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
7709566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow));
7717ba1a0bfSKris Buschelman 
7727ba1a0bfSKris Buschelman   /* Allocate space for ca */
773*84648c2dSPierre Jolivet   PetscCall(PetscCalloc1(ci[cn], &ca));
7747ba1a0bfSKris Buschelman 
7757ba1a0bfSKris Buschelman   /* put together the new matrix */
7769566063dSJacob Faibussowitsch   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C));
7779566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(C, pp->dof));
7787ba1a0bfSKris Buschelman 
7797ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
7807ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
781f4f49eeaSPierre Jolivet   c          = (Mat_SeqAIJ *)C->data;
782e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
783e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
7847ba1a0bfSKris Buschelman   c->nonew   = 0;
78526fbe8dcSKarl Rupp 
7864222ddf1SHong Zhang   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
7874222ddf1SHong Zhang   C->ops->productnumeric = MatProductNumeric_PtAP;
7887ba1a0bfSKris Buschelman 
7897ba1a0bfSKris Buschelman   /* Clean up. */
7909566063dSJacob Faibussowitsch   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
7913ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7927ba1a0bfSKris Buschelman }
7937ba1a0bfSKris Buschelman 
MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)794d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
795d71ae5a4SJacob Faibussowitsch {
7964222ddf1SHong Zhang   Mat_Product *product = C->product;
7974222ddf1SHong Zhang   Mat          A = product->A, P = product->B;
7982121bac1SHong Zhang 
7992121bac1SHong Zhang   PetscFunctionBegin;
8009566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C));
8013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
8022121bac1SHong Zhang }
8032121bac1SHong Zhang 
804bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);
805bc8e477aSFande Kong 
MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)806d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C)
807d71ae5a4SJacob Faibussowitsch {
808bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
809bc8e477aSFande Kong 
810bc8e477aSFande Kong   PetscFunctionBegin;
8119566063dSJacob Faibussowitsch   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C));
8123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
813bc8e477aSFande Kong }
814bc8e477aSFande Kong 
8154222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);
816bc8e477aSFande Kong 
MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)817d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C)
818d71ae5a4SJacob Faibussowitsch {
819bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
820bc8e477aSFande Kong 
821bc8e477aSFande Kong   PetscFunctionBegin;
8229566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C));
8234222ddf1SHong Zhang   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
8243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
825bc8e477aSFande Kong }
826bc8e477aSFande Kong 
827bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);
828bc8e477aSFande Kong 
MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)829d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C)
830d71ae5a4SJacob Faibussowitsch {
831bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
832bc8e477aSFande Kong 
833bc8e477aSFande Kong   PetscFunctionBegin;
8349566063dSJacob Faibussowitsch   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C));
8353ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
836bc8e477aSFande Kong }
837bc8e477aSFande Kong 
8384222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);
839bc8e477aSFande Kong 
MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)840d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C)
841d71ae5a4SJacob Faibussowitsch {
842bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
843bc8e477aSFande Kong 
844bc8e477aSFande Kong   PetscFunctionBegin;
8459566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C));
8464222ddf1SHong Zhang   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
8473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
848bc8e477aSFande Kong }
849bc8e477aSFande Kong 
MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)850d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
851d71ae5a4SJacob Faibussowitsch {
8524222ddf1SHong Zhang   Mat_Product *product = C->product;
8534222ddf1SHong Zhang   Mat          A = product->A, P = product->B;
8544222ddf1SHong Zhang   PetscBool    flg;
855bc8e477aSFande Kong 
856bc8e477aSFande Kong   PetscFunctionBegin;
8579566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg, "allatonce", &flg));
8584222ddf1SHong Zhang   if (flg) {
8599566063dSJacob Faibussowitsch     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C));
8604222ddf1SHong Zhang     C->ops->productnumeric = MatProductNumeric_PtAP;
8613ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
862bc8e477aSFande Kong   }
86334bcad68SFande Kong 
8649566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg));
865966bd95aSPierre Jolivet   PetscCheck(flg, PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
8669566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C));
8674222ddf1SHong Zhang   C->ops->productnumeric = MatProductNumeric_PtAP;
8683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
8694222ddf1SHong Zhang }
87034bcad68SFande Kong 
MatConvert_SeqMAIJ_SeqAIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)871d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
872d71ae5a4SJacob Faibussowitsch {
8739c4fc4c7SBarry Smith   Mat_SeqMAIJ *b   = (Mat_SeqMAIJ *)A->data;
874ceb03754SKris Buschelman   Mat          a   = b->AIJ, B;
8759c4fc4c7SBarry Smith   Mat_SeqAIJ  *aij = (Mat_SeqAIJ *)a->data;
8760fd73130SBarry Smith   PetscInt     m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
877ba8c8a56SBarry Smith   PetscInt    *cols;
878ba8c8a56SBarry Smith   PetscScalar *vals;
8799c4fc4c7SBarry Smith 
8809c4fc4c7SBarry Smith   PetscFunctionBegin;
8819566063dSJacob Faibussowitsch   PetscCall(MatGetSize(a, &m, &n));
8829566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(dof * m, &ilen));
8839c4fc4c7SBarry Smith   for (i = 0; i < m; i++) {
8849c4fc4c7SBarry Smith     nmax = PetscMax(nmax, aij->ilen[i]);
88526fbe8dcSKarl Rupp     for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
8869c4fc4c7SBarry Smith   }
8879566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
8889566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n));
8899566063dSJacob Faibussowitsch   PetscCall(MatSetType(B, newtype));
8909566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen));
8919566063dSJacob Faibussowitsch   PetscCall(PetscFree(ilen));
8929566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nmax, &icols));
8939c4fc4c7SBarry Smith   ii = 0;
8949c4fc4c7SBarry Smith   for (i = 0; i < m; i++) {
8959566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals));
8960fd73130SBarry Smith     for (j = 0; j < dof; j++) {
89726fbe8dcSKarl Rupp       for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
8989566063dSJacob Faibussowitsch       PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
8999c4fc4c7SBarry Smith       ii++;
9009c4fc4c7SBarry Smith     }
9019566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals));
9029c4fc4c7SBarry Smith   }
9039566063dSJacob Faibussowitsch   PetscCall(PetscFree(icols));
9049566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
9059566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
906ceb03754SKris Buschelman 
907511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
9089566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &B));
909c3d102feSKris Buschelman   } else {
910ceb03754SKris Buschelman     *newmat = B;
911c3d102feSKris Buschelman   }
9123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9139c4fc4c7SBarry Smith }
9149c4fc4c7SBarry Smith 
915c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
916be1d678aSKris Buschelman 
MatConvert_MPIMAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)917d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
918d71ae5a4SJacob Faibussowitsch {
9190fd73130SBarry Smith   Mat_MPIMAIJ *maij    = (Mat_MPIMAIJ *)A->data;
920ceb03754SKris Buschelman   Mat          MatAIJ  = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
9210fd73130SBarry Smith   Mat          MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
9220fd73130SBarry Smith   Mat_SeqAIJ  *AIJ     = (Mat_SeqAIJ *)MatAIJ->data;
9230fd73130SBarry Smith   Mat_SeqAIJ  *OAIJ    = (Mat_SeqAIJ *)MatOAIJ->data;
9240fd73130SBarry Smith   Mat_MPIAIJ  *mpiaij  = (Mat_MPIAIJ *)maij->A->data;
9250298fd71SBarry Smith   PetscInt     dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
9260298fd71SBarry Smith   PetscInt    *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
9270fd73130SBarry Smith   PetscInt     rstart, cstart, *garray, ii, k;
9280fd73130SBarry Smith   PetscScalar *vals, *ovals;
9290fd73130SBarry Smith 
9300fd73130SBarry Smith   PetscFunctionBegin;
9319566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz));
932d0f46423SBarry Smith   for (i = 0; i < A->rmap->n / dof; i++) {
9330fd73130SBarry Smith     nmax  = PetscMax(nmax, AIJ->ilen[i]);
9340fd73130SBarry Smith     onmax = PetscMax(onmax, OAIJ->ilen[i]);
9350fd73130SBarry Smith     for (j = 0; j < dof; j++) {
9360fd73130SBarry Smith       dnz[dof * i + j] = AIJ->ilen[i];
9370fd73130SBarry Smith       onz[dof * i + j] = OAIJ->ilen[i];
9380fd73130SBarry Smith     }
9390fd73130SBarry Smith   }
9409566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
9419566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
9429566063dSJacob Faibussowitsch   PetscCall(MatSetType(B, newtype));
9439566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz));
9449566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B, dof));
9459566063dSJacob Faibussowitsch   PetscCall(PetscFree2(dnz, onz));
9460fd73130SBarry Smith 
9479566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols));
948d0f46423SBarry Smith   rstart = dof * maij->A->rmap->rstart;
949d0f46423SBarry Smith   cstart = dof * maij->A->cmap->rstart;
9500fd73130SBarry Smith   garray = mpiaij->garray;
9510fd73130SBarry Smith 
9520fd73130SBarry Smith   ii = rstart;
953d0f46423SBarry Smith   for (i = 0; i < A->rmap->n / dof; i++) {
9549566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
9559566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
9560fd73130SBarry Smith     for (j = 0; j < dof; j++) {
957ad540459SPierre Jolivet       for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j;
958ad540459SPierre Jolivet       for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j;
9599566063dSJacob Faibussowitsch       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
9609566063dSJacob Faibussowitsch       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES));
9610fd73130SBarry Smith       ii++;
9620fd73130SBarry Smith     }
9639566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
9649566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
9650fd73130SBarry Smith   }
9669566063dSJacob Faibussowitsch   PetscCall(PetscFree2(icols, oicols));
9670fd73130SBarry Smith 
9689566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
9699566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
970ceb03754SKris Buschelman 
971511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
9727adad957SLisandro Dalcin     PetscInt refct          = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
9737adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
97426fbe8dcSKarl Rupp 
9759566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &B));
97626fbe8dcSKarl Rupp 
9777adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
978c3d102feSKris Buschelman   } else {
979ceb03754SKris Buschelman     *newmat = B;
980c3d102feSKris Buschelman   }
9813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9820fd73130SBarry Smith }
9830fd73130SBarry Smith 
MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat * newmat)98480eb40d0SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
985d71ae5a4SJacob Faibussowitsch {
9869e516c8fSBarry Smith   Mat A;
9879e516c8fSBarry Smith 
9889e516c8fSBarry Smith   PetscFunctionBegin;
9899566063dSJacob Faibussowitsch   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
9909566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
9919566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&A));
9923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9939e516c8fSBarry Smith }
9940fd73130SBarry Smith 
MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat * submat[])99580eb40d0SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
996d71ae5a4SJacob Faibussowitsch {
997ec626240SStefano Zampini   Mat A;
998ec626240SStefano Zampini 
999ec626240SStefano Zampini   PetscFunctionBegin;
10009566063dSJacob Faibussowitsch   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
10019566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat));
10029566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&A));
10033ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1004ec626240SStefano Zampini }
1005ec626240SStefano Zampini 
1006480dffcfSJed Brown /*@
10070bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
10080bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
100911a5261eSBarry Smith   way independently.  The matrix type is based on `MATSEQAIJ` for sequential matrices,
101011a5261eSBarry Smith   and `MATMPIAIJ` for distributed matrices.
10110bad9183SKris Buschelman 
1012ff585edeSJed Brown   Collective
1013ff585edeSJed Brown 
1014ff585edeSJed Brown   Input Parameters:
101511a5261eSBarry Smith + A   - the `MATAIJ` matrix describing the action on blocks
1016ff585edeSJed Brown - dof - the block size (number of components per node)
1017ff585edeSJed Brown 
1018ff585edeSJed Brown   Output Parameter:
101911a5261eSBarry Smith . maij - the new `MATMAIJ` matrix
1020ff585edeSJed Brown 
10212ef1f0ffSBarry Smith   Level: advanced
10222ef1f0ffSBarry Smith 
1023fe59aa6dSJacob Faibussowitsch .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`
1024ff585edeSJed Brown @*/
MatCreateMAIJ(Mat A,PetscInt dof,Mat * maij)1025d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij)
1026d71ae5a4SJacob Faibussowitsch {
1027b24ad042SBarry Smith   PetscInt  n;
102882b1193eSBarry Smith   Mat       B;
102990f67b22SBarry Smith   PetscBool flg;
1030fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
1031fdc842d1SBarry Smith   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
1032fdc842d1SBarry Smith   PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
1033fdc842d1SBarry Smith #endif
103482b1193eSBarry Smith 
103582b1193eSBarry Smith   PetscFunctionBegin;
1036fdc842d1SBarry Smith   dof = PetscAbs(dof);
10379566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)A));
1038d72c5c08SBarry Smith 
103926fbe8dcSKarl Rupp   if (dof == 1) *maij = A;
104026fbe8dcSKarl Rupp   else {
10419566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1042c248e2fdSStefano Zampini     /* propagate vec type */
10439566063dSJacob Faibussowitsch     PetscCall(MatSetVecType(B, A->defaultvectype));
10449566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N));
10459566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetBlockSize(B->rmap, dof));
10469566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetBlockSize(B->cmap, dof));
10479566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->rmap));
10489566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->cmap));
104926fbe8dcSKarl Rupp 
1050cd3bbe55SBarry Smith     B->assembled = PETSC_TRUE;
1051d72c5c08SBarry Smith 
105290f67b22SBarry Smith     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
105390f67b22SBarry Smith     if (flg) {
1054feb9fe23SJed Brown       Mat_SeqMAIJ *b;
1055feb9fe23SJed Brown 
10569566063dSJacob Faibussowitsch       PetscCall(MatSetType(B, MATSEQMAIJ));
105726fbe8dcSKarl Rupp 
10580298fd71SBarry Smith       B->ops->setup   = NULL;
10594cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
10600fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
10614222ddf1SHong Zhang 
1062feb9fe23SJed Brown       b      = (Mat_SeqMAIJ *)B->data;
1063b9b97703SBarry Smith       b->dof = dof;
10644cb9d9b8SBarry Smith       b->AIJ = A;
106526fbe8dcSKarl Rupp 
1066d72c5c08SBarry Smith       if (dof == 2) {
1067cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
1068cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1069cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1070cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1071bcc973b7SBarry Smith       } else if (dof == 3) {
1072bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
1073bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1074bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1075bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1076bcc973b7SBarry Smith       } else if (dof == 4) {
1077bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
1078bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1079bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1080bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1081f9fae5adSBarry Smith       } else if (dof == 5) {
1082f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
1083f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1084f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1085f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
10866cd98798SBarry Smith       } else if (dof == 6) {
10876cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
10886cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
10896cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
10906cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1091ed8eea03SBarry Smith       } else if (dof == 7) {
1092ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
1093ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
1094ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
1095ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
109666ed3db0SBarry Smith       } else if (dof == 8) {
109766ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
109866ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
109966ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
110066ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
11012b692628SMatthew Knepley       } else if (dof == 9) {
11022b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
11032b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
11042b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
11052b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
1106b9cda22cSBarry Smith       } else if (dof == 10) {
1107b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
1108b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
1109b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
1110b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
1111dbdd7285SBarry Smith       } else if (dof == 11) {
1112dbdd7285SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_11;
1113dbdd7285SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
1114dbdd7285SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
1115dbdd7285SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
11162f7816d4SBarry Smith       } else if (dof == 16) {
11172f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
11182f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
11192f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
11202f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1121ed1c418cSBarry Smith       } else if (dof == 18) {
1122ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
1123ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
1124ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
1125ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
112682b1193eSBarry Smith       } else {
11276861f193SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_N;
11286861f193SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
11296861f193SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
11306861f193SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
113182b1193eSBarry Smith       }
1132fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
11339566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ));
1134fdc842d1SBarry Smith #endif
11359566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ));
11369566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
1137f4a53059SBarry Smith     } else {
1138f4a53059SBarry Smith       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
1139feb9fe23SJed Brown       Mat_MPIMAIJ *b;
1140f4a53059SBarry Smith       IS           from, to;
1141f4a53059SBarry Smith       Vec          gvec;
1142f4a53059SBarry Smith 
11439566063dSJacob Faibussowitsch       PetscCall(MatSetType(B, MATMPIMAIJ));
114426fbe8dcSKarl Rupp 
11450298fd71SBarry Smith       B->ops->setup   = NULL;
1146d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
11470fd73130SBarry Smith       B->ops->view    = MatView_MPIMAIJ;
114826fbe8dcSKarl Rupp 
1149b9b97703SBarry Smith       b      = (Mat_MPIMAIJ *)B->data;
1150b9b97703SBarry Smith       b->dof = dof;
1151b9b97703SBarry Smith       b->A   = A;
115226fbe8dcSKarl Rupp 
11539566063dSJacob Faibussowitsch       PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ));
11549566063dSJacob Faibussowitsch       PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ));
11554cb9d9b8SBarry Smith 
11569566063dSJacob Faibussowitsch       PetscCall(VecGetSize(mpiaij->lvec, &n));
11579566063dSJacob Faibussowitsch       PetscCall(VecCreate(PETSC_COMM_SELF, &b->w));
11589566063dSJacob Faibussowitsch       PetscCall(VecSetSizes(b->w, n * dof, n * dof));
11599566063dSJacob Faibussowitsch       PetscCall(VecSetBlockSize(b->w, dof));
11609566063dSJacob Faibussowitsch       PetscCall(VecSetType(b->w, VECSEQ));
1161f4a53059SBarry Smith 
1162f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
11639566063dSJacob Faibussowitsch       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
11649566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to));
1165f4a53059SBarry Smith 
1166f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
11679566063dSJacob Faibussowitsch       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec));
1168f4a53059SBarry Smith 
1169f4a53059SBarry Smith       /* generate the scatter context */
11709566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx));
1171f4a53059SBarry Smith 
11729566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&from));
11739566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&to));
11749566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&gvec));
1175f4a53059SBarry Smith 
1176f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
11774cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
11784cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
11794cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
118026fbe8dcSKarl Rupp 
1181fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
11829566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ));
1183fdc842d1SBarry Smith #endif
11849566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ));
11859566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
1186f4a53059SBarry Smith     }
11877dae84e0SHong Zhang     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
1188ec626240SStefano Zampini     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
11899566063dSJacob Faibussowitsch     PetscCall(MatSetUp(B));
1190fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
1191fdc842d1SBarry Smith     /* temporary until we have CUDA implementation of MAIJ */
1192fdc842d1SBarry Smith     {
1193fdc842d1SBarry Smith       PetscBool flg;
1194fdc842d1SBarry Smith       if (convert) {
11959566063dSJacob Faibussowitsch         PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, ""));
119648a46eb9SPierre Jolivet         if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B));
1197fdc842d1SBarry Smith       }
1198fdc842d1SBarry Smith     }
1199fdc842d1SBarry Smith #endif
1200cd3bbe55SBarry Smith     *maij = B;
1201d72c5c08SBarry Smith   }
12023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
120382b1193eSBarry Smith }
1204