xref: /petsc/src/mat/impls/maij/maij.c (revision 5ff6d247539c86491dc822dc7e845e819e6cc4a3)
1 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/utils/freespace.h>
3 
4 /*@
5   MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix
6 
7   Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
8 
9   Input Parameter:
10 . A - the `MATMAIJ` matrix
11 
12   Output Parameter:
13 . B - the `MATAIJ` matrix
14 
15   Level: advanced
16 
17   Note:
18   The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
19 
20 .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()`
21 @*/
MatMAIJGetAIJ(Mat A,Mat * B)22 PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B)
23 {
24   PetscBool ismpimaij, isseqmaij;
25 
26   PetscFunctionBegin;
27   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij));
28   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij));
29   if (ismpimaij) {
30     Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
31 
32     *B = b->A;
33   } else if (isseqmaij) {
34     Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
35 
36     *B = b->AIJ;
37   } else {
38     *B = A;
39   }
40   PetscFunctionReturn(PETSC_SUCCESS);
41 }
42 
43 /*@
44   MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size
45 
46   Logically Collective
47 
48   Input Parameters:
49 + A   - the `MATMAIJ` matrix
50 - dof - the block size for the new matrix
51 
52   Output Parameter:
53 . B - the new `MATMAIJ` matrix
54 
55   Level: advanced
56 
57 .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MatCreateMAIJ()`
58 @*/
MatMAIJRedimension(Mat A,PetscInt dof,Mat * B)59 PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B)
60 {
61   Mat Aij = NULL;
62 
63   PetscFunctionBegin;
64   PetscValidLogicalCollectiveInt(A, dof, 2);
65   PetscCall(MatMAIJGetAIJ(A, &Aij));
66   PetscCall(MatCreateMAIJ(Aij, dof, B));
67   PetscFunctionReturn(PETSC_SUCCESS);
68 }
69 
MatDestroy_SeqMAIJ(Mat A)70 static PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
71 {
72   Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
73 
74   PetscFunctionBegin;
75   PetscCall(MatDestroy(&b->AIJ));
76   PetscCall(PetscFree(A->data));
77   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL));
79   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL));
80   PetscFunctionReturn(PETSC_SUCCESS);
81 }
82 
MatSetUp_MAIJ(Mat A)83 static PetscErrorCode MatSetUp_MAIJ(Mat A)
84 {
85   PetscFunctionBegin;
86   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
87 }
88 
MatView_SeqMAIJ(Mat A,PetscViewer viewer)89 static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
95   PetscCall(MatView(B, viewer));
96   PetscCall(MatDestroy(&B));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
MatView_MPIMAIJ(Mat A,PetscViewer viewer)100 static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer)
101 {
102   Mat B;
103 
104   PetscFunctionBegin;
105   PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B));
106   PetscCall(MatView(B, viewer));
107   PetscCall(MatDestroy(&B));
108   PetscFunctionReturn(PETSC_SUCCESS);
109 }
110 
MatDestroy_MPIMAIJ(Mat A)111 static PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
112 {
113   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
114 
115   PetscFunctionBegin;
116   PetscCall(MatDestroy(&b->AIJ));
117   PetscCall(MatDestroy(&b->OAIJ));
118   PetscCall(MatDestroy(&b->A));
119   PetscCall(VecScatterDestroy(&b->ctx));
120   PetscCall(VecDestroy(&b->w));
121   PetscCall(PetscFree(A->data));
122   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL));
123   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL));
124   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL));
125   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
126   PetscFunctionReturn(PETSC_SUCCESS);
127 }
128 
129 /*MC
130   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
131   multicomponent problems, interpolating or restricting each component the same way independently.
132   The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices.
133 
134   Operations provided:
135 .vb
136     MatMult()
137     MatMultTranspose()
138     MatMultAdd()
139     MatMultTransposeAdd()
140 .ve
141 
142   Level: advanced
143 
144 .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
145 M*/
146 
MatCreate_MAIJ(Mat A)147 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
148 {
149   Mat_MPIMAIJ *b;
150   PetscMPIInt  size;
151 
152   PetscFunctionBegin;
153   PetscCall(PetscNew(&b));
154   A->data = (void *)b;
155 
156   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
157 
158   A->ops->setup = MatSetUp_MAIJ;
159 
160   b->AIJ  = NULL;
161   b->dof  = 0;
162   b->OAIJ = NULL;
163   b->ctx  = NULL;
164   b->w    = NULL;
165   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
166   if (size == 1) {
167     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ));
168   } else {
169     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ));
170   }
171   A->preallocated = PETSC_TRUE;
172   A->assembled    = PETSC_TRUE;
173   PetscFunctionReturn(PETSC_SUCCESS);
174 }
175 
176 #if PetscHasAttribute(always_inline)
177   #define PETSC_FORCE_INLINE __attribute__((always_inline))
178 #else
179   #define PETSC_FORCE_INLINE
180 #endif
181 
182 #if defined(__clang__)
183   #define PETSC_PRAGMA_UNROLL _Pragma("unroll")
184 #else
185   #define PETSC_PRAGMA_UNROLL
186 #endif
187 
188 enum {
189   MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18
190 };
191 
192 // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline'
193 // keyword into account for these...
MatMult_MatMultAdd_SeqMAIJ_Template(Mat A,Vec xx,Vec yy,Vec zz,int N)194 PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
195 {
196   const PetscBool    mult_add   = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
197   const Mat_SeqMAIJ *b          = (Mat_SeqMAIJ *)A->data;
198   const Mat          baij       = b->AIJ;
199   const Mat_SeqAIJ  *a          = (Mat_SeqAIJ *)baij->data;
200   const PetscInt     m          = baij->rmap->n;
201   const PetscInt     nz         = a->nz;
202   const PetscInt    *idx        = a->j;
203   const PetscInt    *ii         = a->i;
204   const PetscScalar *v          = a->a;
205   PetscInt           nonzerorow = 0;
206   const PetscScalar *x;
207   PetscScalar       *z;
208 
209   PetscFunctionBegin;
210   PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
211   if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz));
212   PetscCall(VecGetArrayRead(xx, &x));
213   if (mult_add) {
214     PetscCall(VecGetArray(zz, &z));
215   } else {
216     PetscCall(VecGetArrayWrite(zz, &z));
217   }
218 
219   for (PetscInt i = 0; i < m; ++i) {
220     PetscInt       jrow = ii[i];
221     const PetscInt n    = ii[i + 1] - jrow;
222     // leave a line so clang-format does not align these decls
223     PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0};
224 
225     nonzerorow += n > 0;
226     for (PetscInt j = 0; j < n; ++j, ++jrow) {
227       const PetscScalar v_jrow     = v[jrow];
228       const PetscInt    N_idx_jrow = N * idx[jrow];
229 
230       PETSC_PRAGMA_UNROLL
231       for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k];
232     }
233 
234     PETSC_PRAGMA_UNROLL
235     for (int k = 0; k < N; ++k) {
236       const PetscInt z_idx = N * i + k;
237 
238       if (mult_add) {
239         z[z_idx] += sum[k];
240       } else {
241         z[z_idx] = sum[k];
242       }
243     }
244   }
245   PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow))));
246   PetscCall(VecRestoreArrayRead(xx, &x));
247   if (mult_add) {
248     PetscCall(VecRestoreArray(zz, &z));
249   } else {
250     PetscCall(VecRestoreArrayWrite(zz, &z));
251   }
252   PetscFunctionReturn(PETSC_SUCCESS);
253 }
254 
MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A,Vec xx,Vec yy,Vec zz,int N)255 PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
256 {
257   const PetscBool    mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
258   const Mat_SeqMAIJ *b        = (Mat_SeqMAIJ *)A->data;
259   const Mat          baij     = b->AIJ;
260   const Mat_SeqAIJ  *a        = (Mat_SeqAIJ *)baij->data;
261   const PetscInt     m        = baij->rmap->n;
262   const PetscInt     nz       = a->nz;
263   const PetscInt    *a_j      = a->j;
264   const PetscInt    *a_i      = a->i;
265   const PetscScalar *a_a      = a->a;
266   const PetscScalar *x;
267   PetscScalar       *z;
268 
269   PetscFunctionBegin;
270   PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
271   if (mult_add) {
272     if (yy != zz) PetscCall(VecCopy(yy, zz));
273   } else {
274     PetscCall(VecSet(zz, 0.0));
275   }
276   PetscCall(VecGetArrayRead(xx, &x));
277   PetscCall(VecGetArray(zz, &z));
278 
279   for (PetscInt i = 0; i < m; i++) {
280     const PetscInt     a_ii = a_i[i];
281     const PetscInt    *idx  = PetscSafePointerPlusOffset(a_j, a_ii);
282     const PetscScalar *v    = PetscSafePointerPlusOffset(a_a, a_ii);
283     const PetscInt     n    = a_i[i + 1] - a_ii;
284     PetscScalar        alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE];
285 
286     PETSC_PRAGMA_UNROLL
287     for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k];
288     for (PetscInt j = 0; j < n; ++j) {
289       const PetscInt    N_idx_j = N * idx[j];
290       const PetscScalar v_j     = v[j];
291 
292       PETSC_PRAGMA_UNROLL
293       for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j;
294     }
295   }
296 
297   PetscCall(PetscLogFlops(2 * N * nz));
298   PetscCall(VecRestoreArrayRead(xx, &x));
299   PetscCall(VecRestoreArray(zz, &z));
300   PetscFunctionReturn(PETSC_SUCCESS);
301 }
302 
303 #define MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(N) \
304   static PetscErrorCode PetscConcat(MatMult_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
305   { \
306     PetscFunctionBegin; \
307     PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
308     PetscFunctionReturn(PETSC_SUCCESS); \
309   } \
310   static PetscErrorCode PetscConcat(MatMultTranspose_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
311   { \
312     PetscFunctionBegin; \
313     PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
314     PetscFunctionReturn(PETSC_SUCCESS); \
315   } \
316   static PetscErrorCode PetscConcat(MatMultAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
317   { \
318     PetscFunctionBegin; \
319     PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
320     PetscFunctionReturn(PETSC_SUCCESS); \
321   } \
322   static PetscErrorCode PetscConcat(MatMultTransposeAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
323   { \
324     PetscFunctionBegin; \
325     PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
326     PetscFunctionReturn(PETSC_SUCCESS); \
327   }
328 
329 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(2)
330 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(3)
331 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(4)
332 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(5)
333 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(6)
334 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(7)
335 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(8)
336 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(9)
337 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(10)
338 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(11)
339 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(16)
340 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(18)
341 
342 #undef MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE
343 
MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)344 static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
345 {
346   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
347   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
348   const PetscScalar *x, *v;
349   PetscScalar       *y, *sums;
350   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
351   PetscInt           n, i, jrow, j, dof = b->dof, k;
352 
353   PetscFunctionBegin;
354   PetscCall(VecGetArrayRead(xx, &x));
355   PetscCall(VecSet(yy, 0.0));
356   PetscCall(VecGetArray(yy, &y));
357   idx = a->j;
358   v   = a->a;
359   ii  = a->i;
360 
361   for (i = 0; i < m; i++) {
362     jrow = ii[i];
363     n    = ii[i + 1] - jrow;
364     sums = y + dof * i;
365     for (j = 0; j < n; j++) {
366       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
367       jrow++;
368     }
369   }
370 
371   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
372   PetscCall(VecRestoreArrayRead(xx, &x));
373   PetscCall(VecRestoreArray(yy, &y));
374   PetscFunctionReturn(PETSC_SUCCESS);
375 }
376 
MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)377 static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
378 {
379   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
380   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
381   const PetscScalar *x, *v;
382   PetscScalar       *y, *sums;
383   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
384   PetscInt           n, i, jrow, j, dof = b->dof, k;
385 
386   PetscFunctionBegin;
387   if (yy != zz) PetscCall(VecCopy(yy, zz));
388   PetscCall(VecGetArrayRead(xx, &x));
389   PetscCall(VecGetArray(zz, &y));
390   idx = a->j;
391   v   = a->a;
392   ii  = a->i;
393 
394   for (i = 0; i < m; i++) {
395     jrow = ii[i];
396     n    = ii[i + 1] - jrow;
397     sums = y + dof * i;
398     for (j = 0; j < n; j++) {
399       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
400       jrow++;
401     }
402   }
403 
404   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
405   PetscCall(VecRestoreArrayRead(xx, &x));
406   PetscCall(VecRestoreArray(zz, &y));
407   PetscFunctionReturn(PETSC_SUCCESS);
408 }
409 
MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)410 static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
411 {
412   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
413   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
414   const PetscScalar *x, *v, *alpha;
415   PetscScalar       *y;
416   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
417   PetscInt           n, i, k;
418 
419   PetscFunctionBegin;
420   PetscCall(VecGetArrayRead(xx, &x));
421   PetscCall(VecSet(yy, 0.0));
422   PetscCall(VecGetArray(yy, &y));
423   for (i = 0; i < m; i++) {
424     idx   = PetscSafePointerPlusOffset(a->j, a->i[i]);
425     v     = PetscSafePointerPlusOffset(a->a, a->i[i]);
426     n     = a->i[i + 1] - a->i[i];
427     alpha = x + dof * i;
428     while (n-- > 0) {
429       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
430       idx++;
431       v++;
432     }
433   }
434   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
435   PetscCall(VecRestoreArrayRead(xx, &x));
436   PetscCall(VecRestoreArray(yy, &y));
437   PetscFunctionReturn(PETSC_SUCCESS);
438 }
439 
MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)440 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
441 {
442   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
443   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
444   const PetscScalar *x, *v, *alpha;
445   PetscScalar       *y;
446   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
447   PetscInt           n, i, k;
448 
449   PetscFunctionBegin;
450   if (yy != zz) PetscCall(VecCopy(yy, zz));
451   PetscCall(VecGetArrayRead(xx, &x));
452   PetscCall(VecGetArray(zz, &y));
453   for (i = 0; i < m; i++) {
454     idx   = a->j + a->i[i];
455     v     = a->a + a->i[i];
456     n     = a->i[i + 1] - a->i[i];
457     alpha = x + dof * i;
458     while (n-- > 0) {
459       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
460       idx++;
461       v++;
462     }
463   }
464   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
465   PetscCall(VecRestoreArrayRead(xx, &x));
466   PetscCall(VecRestoreArray(zz, &y));
467   PetscFunctionReturn(PETSC_SUCCESS);
468 }
469 
MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)470 static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
471 {
472   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
473 
474   PetscFunctionBegin;
475   /* start the scatter */
476   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
477   PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy));
478   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
479   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy));
480   PetscFunctionReturn(PETSC_SUCCESS);
481 }
482 
MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)483 static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
484 {
485   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
486 
487   PetscFunctionBegin;
488   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
489   PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy));
490   PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
491   PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
492   PetscFunctionReturn(PETSC_SUCCESS);
493 }
494 
MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)495 static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
496 {
497   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
498 
499   PetscFunctionBegin;
500   /* start the scatter */
501   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
502   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz));
503   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
504   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
505   PetscFunctionReturn(PETSC_SUCCESS);
506 }
507 
MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)508 static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
509 {
510   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
511 
512   PetscFunctionBegin;
513   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
514   PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz));
515   PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
516   PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
517   PetscFunctionReturn(PETSC_SUCCESS);
518 }
519 
MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)520 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
521 {
522   Mat_Product *product = C->product;
523 
524   PetscFunctionBegin;
525   PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
526   C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
527   PetscFunctionReturn(PETSC_SUCCESS);
528 }
529 
MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)530 static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
531 {
532   Mat_Product *product = C->product;
533   PetscBool    flg     = PETSC_FALSE;
534   Mat          A = product->A, P = product->B;
535   PetscInt     alg = 1; /* set default algorithm */
536 #if !defined(PETSC_HAVE_HYPRE)
537   const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
538   PetscInt    nalg        = 4;
539 #else
540   const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
541   PetscInt    nalg        = 5;
542 #endif
543 
544   PetscFunctionBegin;
545   PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]);
546 
547   /* PtAP */
548   /* Check matrix local sizes */
549   PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
550              A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
551   PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
552              A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);
553 
554   /* Set the default algorithm */
555   PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
556   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
557 
558   /* Get runtime option */
559   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
560   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
561   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
562   PetscOptionsEnd();
563 
564   PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg));
565   if (flg) {
566     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
567     PetscFunctionReturn(PETSC_SUCCESS);
568   }
569 
570   PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg));
571   if (flg) {
572     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
573     PetscFunctionReturn(PETSC_SUCCESS);
574   }
575 
576   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
577   PetscCall(PetscInfo(A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
578   PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P));
579   PetscCall(MatProductSetFromOptions(C));
580   PetscFunctionReturn(PETSC_SUCCESS);
581 }
582 
MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)583 static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C)
584 {
585   /* This routine requires testing -- first draft only */
586   Mat_SeqMAIJ     *pp = (Mat_SeqMAIJ *)PP->data;
587   Mat              P  = pp->AIJ;
588   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
589   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *)P->data;
590   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *)C->data;
591   const PetscInt  *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
592   const PetscInt  *ci = c->i, *cj = c->j, *cjj;
593   const PetscInt   am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
594   PetscInt         i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
595   const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
596   MatScalar       *ca = c->a, *caj, *apa;
597 
598   PetscFunctionBegin;
599   /* Allocate temporary array for storage of one row of A*P */
600   PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense));
601 
602   /* Clear old values in C */
603   PetscCall(PetscArrayzero(ca, ci[cm]));
604 
605   for (i = 0; i < am; i++) {
606     /* Form sparse row of A*P */
607     anzi  = ai[i + 1] - ai[i];
608     apnzj = 0;
609     for (j = 0; j < anzi; j++) {
610       /* Get offset within block of P */
611       pshift = *aj % ppdof;
612       /* Get block row of P */
613       prow = *aj++ / ppdof; /* integer division */
614       pnzj = pi[prow + 1] - pi[prow];
615       pjj  = pj + pi[prow];
616       paj  = pa + pi[prow];
617       for (k = 0; k < pnzj; k++) {
618         poffset = pjj[k] * ppdof + pshift;
619         if (!apjdense[poffset]) {
620           apjdense[poffset] = -1;
621           apj[apnzj++]      = poffset;
622         }
623         apa[poffset] += (*aa) * paj[k];
624       }
625       PetscCall(PetscLogFlops(2.0 * pnzj));
626       aa++;
627     }
628 
629     /* Sort the j index array for quick sparse axpy. */
630     /* Note: a array does not need sorting as it is in dense storage locations. */
631     PetscCall(PetscSortInt(apnzj, apj));
632 
633     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
634     prow    = i / ppdof; /* integer division */
635     pshift  = i % ppdof;
636     poffset = pi[prow];
637     pnzi    = pi[prow + 1] - poffset;
638     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
639     pJ = pj + poffset;
640     pA = pa + poffset;
641     for (j = 0; j < pnzi; j++) {
642       crow = (*pJ) * ppdof + pshift;
643       cjj  = cj + ci[crow];
644       caj  = ca + ci[crow];
645       pJ++;
646       /* Perform sparse axpy operation.  Note cjj includes apj. */
647       for (k = 0, nextap = 0; nextap < apnzj; k++) {
648         if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
649       }
650       PetscCall(PetscLogFlops(2.0 * apnzj));
651       pA++;
652     }
653 
654     /* Zero the current row info for A*P */
655     for (j = 0; j < apnzj; j++) {
656       apa[apj[j]]      = 0.;
657       apjdense[apj[j]] = 0;
658     }
659   }
660 
661   /* Assemble the final matrix and clean up */
662   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
663   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
664   PetscCall(PetscFree3(apa, apj, apjdense));
665   PetscFunctionReturn(PETSC_SUCCESS);
666 }
667 
MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)668 static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C)
669 {
670   PetscFreeSpaceList free_space = NULL, current_space = NULL;
671   Mat_SeqMAIJ       *pp = (Mat_SeqMAIJ *)PP->data;
672   Mat                P  = pp->AIJ;
673   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
674   PetscInt          *pti, *ptj, *ptJ;
675   PetscInt          *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
676   const PetscInt     an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
677   PetscInt           i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
678   MatScalar         *ca;
679   const PetscInt    *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;
680 
681   PetscFunctionBegin;
682   /* Get ij structure of P^T */
683   PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
684 
685   cn = pn * ppdof;
686   /* Allocate ci array, arrays for fill computation and */
687   /* free space for accumulating nonzero column info */
688   PetscCall(PetscMalloc1(cn + 1, &ci));
689   ci[0] = 0;
690 
691   /* Work arrays for rows of P^T*A */
692   PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow));
693   PetscCall(PetscArrayzero(ptadenserow, an));
694   PetscCall(PetscArrayzero(denserow, cn));
695 
696   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
697   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
698   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
699   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space));
700   current_space = free_space;
701 
702   /* Determine symbolic info for each row of C: */
703   for (i = 0; i < pn; i++) {
704     ptnzi = pti[i + 1] - pti[i];
705     ptJ   = ptj + pti[i];
706     for (dof = 0; dof < ppdof; dof++) {
707       ptanzi = 0;
708       /* Determine symbolic row of PtA: */
709       for (j = 0; j < ptnzi; j++) {
710         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
711         arow = ptJ[j] * ppdof + dof;
712         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
713         anzj = ai[arow + 1] - ai[arow];
714         ajj  = aj + ai[arow];
715         for (k = 0; k < anzj; k++) {
716           if (!ptadenserow[ajj[k]]) {
717             ptadenserow[ajj[k]]    = -1;
718             ptasparserow[ptanzi++] = ajj[k];
719           }
720         }
721       }
722       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
723       ptaj = ptasparserow;
724       cnzi = 0;
725       for (j = 0; j < ptanzi; j++) {
726         /* Get offset within block of P */
727         pshift = *ptaj % ppdof;
728         /* Get block row of P */
729         prow = (*ptaj++) / ppdof; /* integer division */
730         /* P has same number of nonzeros per row as the compressed form */
731         pnzj = pi[prow + 1] - pi[prow];
732         pjj  = pj + pi[prow];
733         for (k = 0; k < pnzj; k++) {
734           /* Locations in C are shifted by the offset within the block */
735           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
736           if (!denserow[pjj[k] * ppdof + pshift]) {
737             denserow[pjj[k] * ppdof + pshift] = -1;
738             sparserow[cnzi++]                 = pjj[k] * ppdof + pshift;
739           }
740         }
741       }
742 
743       /* sort sparserow */
744       PetscCall(PetscSortInt(cnzi, sparserow));
745 
746       /* If free space is not available, make more free space */
747       /* Double the amount of total space in the list */
748       if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), &current_space));
749 
750       /* Copy data into free space, and zero out denserows */
751       PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi));
752 
753       current_space->array += cnzi;
754       current_space->local_used += cnzi;
755       current_space->local_remaining -= cnzi;
756 
757       for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
758       for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;
759 
760       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
761       /*        For now, we will recompute what is needed. */
762       ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
763     }
764   }
765   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
766   /* Allocate space for cj, initialize cj, and */
767   /* destroy list of free space and other temporary array(s) */
768   PetscCall(PetscMalloc1(ci[cn], &cj));
769   PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
770   PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow));
771 
772   /* Allocate space for ca */
773   PetscCall(PetscCalloc1(ci[cn], &ca));
774 
775   /* put together the new matrix */
776   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C));
777   PetscCall(MatSetBlockSize(C, pp->dof));
778 
779   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
780   /* Since these are PETSc arrays, change flags to free them as necessary. */
781   c          = (Mat_SeqAIJ *)C->data;
782   c->free_a  = PETSC_TRUE;
783   c->free_ij = PETSC_TRUE;
784   c->nonew   = 0;
785 
786   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
787   C->ops->productnumeric = MatProductNumeric_PtAP;
788 
789   /* Clean up. */
790   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
791   PetscFunctionReturn(PETSC_SUCCESS);
792 }
793 
MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)794 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
795 {
796   Mat_Product *product = C->product;
797   Mat          A = product->A, P = product->B;
798 
799   PetscFunctionBegin;
800   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C));
801   PetscFunctionReturn(PETSC_SUCCESS);
802 }
803 
804 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);
805 
MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)806 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C)
807 {
808   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
809 
810   PetscFunctionBegin;
811   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C));
812   PetscFunctionReturn(PETSC_SUCCESS);
813 }
814 
815 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);
816 
MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)817 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C)
818 {
819   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
820 
821   PetscFunctionBegin;
822   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C));
823   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
824   PetscFunctionReturn(PETSC_SUCCESS);
825 }
826 
827 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);
828 
MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)829 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C)
830 {
831   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
832 
833   PetscFunctionBegin;
834   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C));
835   PetscFunctionReturn(PETSC_SUCCESS);
836 }
837 
838 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);
839 
MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)840 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C)
841 {
842   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
843 
844   PetscFunctionBegin;
845   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C));
846   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
847   PetscFunctionReturn(PETSC_SUCCESS);
848 }
849 
MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)850 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
851 {
852   Mat_Product *product = C->product;
853   Mat          A = product->A, P = product->B;
854   PetscBool    flg;
855 
856   PetscFunctionBegin;
857   PetscCall(PetscStrcmp(product->alg, "allatonce", &flg));
858   if (flg) {
859     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C));
860     C->ops->productnumeric = MatProductNumeric_PtAP;
861     PetscFunctionReturn(PETSC_SUCCESS);
862   }
863 
864   PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg));
865   PetscCheck(flg, PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
866   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C));
867   C->ops->productnumeric = MatProductNumeric_PtAP;
868   PetscFunctionReturn(PETSC_SUCCESS);
869 }
870 
MatConvert_SeqMAIJ_SeqAIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)871 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
872 {
873   Mat_SeqMAIJ *b   = (Mat_SeqMAIJ *)A->data;
874   Mat          a   = b->AIJ, B;
875   Mat_SeqAIJ  *aij = (Mat_SeqAIJ *)a->data;
876   PetscInt     m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
877   PetscInt    *cols;
878   PetscScalar *vals;
879 
880   PetscFunctionBegin;
881   PetscCall(MatGetSize(a, &m, &n));
882   PetscCall(PetscMalloc1(dof * m, &ilen));
883   for (i = 0; i < m; i++) {
884     nmax = PetscMax(nmax, aij->ilen[i]);
885     for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
886   }
887   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
888   PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n));
889   PetscCall(MatSetType(B, newtype));
890   PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen));
891   PetscCall(PetscFree(ilen));
892   PetscCall(PetscMalloc1(nmax, &icols));
893   ii = 0;
894   for (i = 0; i < m; i++) {
895     PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals));
896     for (j = 0; j < dof; j++) {
897       for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
898       PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
899       ii++;
900     }
901     PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals));
902   }
903   PetscCall(PetscFree(icols));
904   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
905   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
906 
907   if (reuse == MAT_INPLACE_MATRIX) {
908     PetscCall(MatHeaderReplace(A, &B));
909   } else {
910     *newmat = B;
911   }
912   PetscFunctionReturn(PETSC_SUCCESS);
913 }
914 
915 #include <../src/mat/impls/aij/mpi/mpiaij.h>
916 
MatConvert_MPIMAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)917 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
918 {
919   Mat_MPIMAIJ *maij    = (Mat_MPIMAIJ *)A->data;
920   Mat          MatAIJ  = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
921   Mat          MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
922   Mat_SeqAIJ  *AIJ     = (Mat_SeqAIJ *)MatAIJ->data;
923   Mat_SeqAIJ  *OAIJ    = (Mat_SeqAIJ *)MatOAIJ->data;
924   Mat_MPIAIJ  *mpiaij  = (Mat_MPIAIJ *)maij->A->data;
925   PetscInt     dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
926   PetscInt    *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
927   PetscInt     rstart, cstart, *garray, ii, k;
928   PetscScalar *vals, *ovals;
929 
930   PetscFunctionBegin;
931   PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz));
932   for (i = 0; i < A->rmap->n / dof; i++) {
933     nmax  = PetscMax(nmax, AIJ->ilen[i]);
934     onmax = PetscMax(onmax, OAIJ->ilen[i]);
935     for (j = 0; j < dof; j++) {
936       dnz[dof * i + j] = AIJ->ilen[i];
937       onz[dof * i + j] = OAIJ->ilen[i];
938     }
939   }
940   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
941   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
942   PetscCall(MatSetType(B, newtype));
943   PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz));
944   PetscCall(MatSetBlockSize(B, dof));
945   PetscCall(PetscFree2(dnz, onz));
946 
947   PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols));
948   rstart = dof * maij->A->rmap->rstart;
949   cstart = dof * maij->A->cmap->rstart;
950   garray = mpiaij->garray;
951 
952   ii = rstart;
953   for (i = 0; i < A->rmap->n / dof; i++) {
954     PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
955     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
956     for (j = 0; j < dof; j++) {
957       for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j;
958       for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j;
959       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
960       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES));
961       ii++;
962     }
963     PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
964     PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
965   }
966   PetscCall(PetscFree2(icols, oicols));
967 
968   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
969   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
970 
971   if (reuse == MAT_INPLACE_MATRIX) {
972     PetscInt refct          = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
973     ((PetscObject)A)->refct = 1;
974 
975     PetscCall(MatHeaderReplace(A, &B));
976 
977     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
978   } else {
979     *newmat = B;
980   }
981   PetscFunctionReturn(PETSC_SUCCESS);
982 }
983 
MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat * newmat)984 static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
985 {
986   Mat A;
987 
988   PetscFunctionBegin;
989   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
990   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
991   PetscCall(MatDestroy(&A));
992   PetscFunctionReturn(PETSC_SUCCESS);
993 }
994 
MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat * submat[])995 static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
996 {
997   Mat A;
998 
999   PetscFunctionBegin;
1000   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1001   PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat));
1002   PetscCall(MatDestroy(&A));
1003   PetscFunctionReturn(PETSC_SUCCESS);
1004 }
1005 
1006 /*@
1007   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
1008   operations for multicomponent problems.  It interpolates each component the same
1009   way independently.  The matrix type is based on `MATSEQAIJ` for sequential matrices,
1010   and `MATMPIAIJ` for distributed matrices.
1011 
1012   Collective
1013 
1014   Input Parameters:
1015 + A   - the `MATAIJ` matrix describing the action on blocks
1016 - dof - the block size (number of components per node)
1017 
1018   Output Parameter:
1019 . maij - the new `MATMAIJ` matrix
1020 
1021   Level: advanced
1022 
1023 .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`
1024 @*/
MatCreateMAIJ(Mat A,PetscInt dof,Mat * maij)1025 PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij)
1026 {
1027   PetscInt  n;
1028   Mat       B;
1029   PetscBool flg;
1030 #if defined(PETSC_HAVE_CUDA)
1031   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
1032   PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
1033 #endif
1034 
1035   PetscFunctionBegin;
1036   dof = PetscAbs(dof);
1037   PetscCall(PetscObjectReference((PetscObject)A));
1038 
1039   if (dof == 1) *maij = A;
1040   else {
1041     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1042     /* propagate vec type */
1043     PetscCall(MatSetVecType(B, A->defaultvectype));
1044     PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N));
1045     PetscCall(PetscLayoutSetBlockSize(B->rmap, dof));
1046     PetscCall(PetscLayoutSetBlockSize(B->cmap, dof));
1047     PetscCall(PetscLayoutSetUp(B->rmap));
1048     PetscCall(PetscLayoutSetUp(B->cmap));
1049 
1050     B->assembled = PETSC_TRUE;
1051 
1052     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
1053     if (flg) {
1054       Mat_SeqMAIJ *b;
1055 
1056       PetscCall(MatSetType(B, MATSEQMAIJ));
1057 
1058       B->ops->setup   = NULL;
1059       B->ops->destroy = MatDestroy_SeqMAIJ;
1060       B->ops->view    = MatView_SeqMAIJ;
1061 
1062       b      = (Mat_SeqMAIJ *)B->data;
1063       b->dof = dof;
1064       b->AIJ = A;
1065 
1066       if (dof == 2) {
1067         B->ops->mult             = MatMult_SeqMAIJ_2;
1068         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1069         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1070         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1071       } else if (dof == 3) {
1072         B->ops->mult             = MatMult_SeqMAIJ_3;
1073         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1074         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1075         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1076       } else if (dof == 4) {
1077         B->ops->mult             = MatMult_SeqMAIJ_4;
1078         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1079         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1080         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1081       } else if (dof == 5) {
1082         B->ops->mult             = MatMult_SeqMAIJ_5;
1083         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1084         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1085         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1086       } else if (dof == 6) {
1087         B->ops->mult             = MatMult_SeqMAIJ_6;
1088         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1089         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1090         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1091       } else if (dof == 7) {
1092         B->ops->mult             = MatMult_SeqMAIJ_7;
1093         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
1094         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
1095         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
1096       } else if (dof == 8) {
1097         B->ops->mult             = MatMult_SeqMAIJ_8;
1098         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
1099         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
1100         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1101       } else if (dof == 9) {
1102         B->ops->mult             = MatMult_SeqMAIJ_9;
1103         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
1104         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
1105         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
1106       } else if (dof == 10) {
1107         B->ops->mult             = MatMult_SeqMAIJ_10;
1108         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
1109         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
1110         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
1111       } else if (dof == 11) {
1112         B->ops->mult             = MatMult_SeqMAIJ_11;
1113         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
1114         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
1115         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
1116       } else if (dof == 16) {
1117         B->ops->mult             = MatMult_SeqMAIJ_16;
1118         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
1119         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
1120         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1121       } else if (dof == 18) {
1122         B->ops->mult             = MatMult_SeqMAIJ_18;
1123         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
1124         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
1125         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
1126       } else {
1127         B->ops->mult             = MatMult_SeqMAIJ_N;
1128         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
1129         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
1130         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
1131       }
1132 #if defined(PETSC_HAVE_CUDA)
1133       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ));
1134 #endif
1135       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ));
1136       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
1137     } else {
1138       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
1139       Mat_MPIMAIJ *b;
1140       IS           from, to;
1141       Vec          gvec;
1142 
1143       PetscCall(MatSetType(B, MATMPIMAIJ));
1144 
1145       B->ops->setup   = NULL;
1146       B->ops->destroy = MatDestroy_MPIMAIJ;
1147       B->ops->view    = MatView_MPIMAIJ;
1148 
1149       b      = (Mat_MPIMAIJ *)B->data;
1150       b->dof = dof;
1151       b->A   = A;
1152 
1153       PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ));
1154       PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ));
1155 
1156       PetscCall(VecGetSize(mpiaij->lvec, &n));
1157       PetscCall(VecCreate(PETSC_COMM_SELF, &b->w));
1158       PetscCall(VecSetSizes(b->w, n * dof, n * dof));
1159       PetscCall(VecSetBlockSize(b->w, dof));
1160       PetscCall(VecSetType(b->w, VECSEQ));
1161 
1162       /* create two temporary Index sets for build scatter gather */
1163       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
1164       PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to));
1165 
1166       /* create temporary global vector to generate scatter context */
1167       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec));
1168 
1169       /* generate the scatter context */
1170       PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx));
1171 
1172       PetscCall(ISDestroy(&from));
1173       PetscCall(ISDestroy(&to));
1174       PetscCall(VecDestroy(&gvec));
1175 
1176       B->ops->mult             = MatMult_MPIMAIJ_dof;
1177       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1178       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1179       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1180 
1181 #if defined(PETSC_HAVE_CUDA)
1182       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ));
1183 #endif
1184       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ));
1185       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
1186     }
1187     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
1188     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
1189     PetscCall(MatSetUp(B));
1190 #if defined(PETSC_HAVE_CUDA)
1191     /* temporary until we have CUDA implementation of MAIJ */
1192     {
1193       PetscBool flg;
1194       if (convert) {
1195         PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, ""));
1196         if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B));
1197       }
1198     }
1199 #endif
1200     *maij = B;
1201   }
1202   PetscFunctionReturn(PETSC_SUCCESS);
1203 }
1204