xref: /petsc/src/mat/impls/maij/maij.c (revision d2522c19e8fa9bca20aaca277941d9a63e71db6a)
1 
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20 #include <../src/mat/utils/freespace.h>
21 
22 /*@
23    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
24 
25    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
26 
27    Input Parameter:
28 .  A - the MAIJ matrix
29 
30    Output Parameter:
31 .  B - the AIJ matrix
32 
33    Level: advanced
34 
35    Notes:
36     The reference count on the AIJ matrix is not increased so you should not destroy it.
37 
38 .seealso: `MatCreateMAIJ()`
39 @*/
40 PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) {
41   PetscBool ismpimaij, isseqmaij;
42 
43   PetscFunctionBegin;
44   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij));
45   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij));
46   if (ismpimaij) {
47     Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
48 
49     *B = b->A;
50   } else if (isseqmaij) {
51     Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
52 
53     *B = b->AIJ;
54   } else {
55     *B = A;
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 /*@
61    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
62 
63    Logically Collective
64 
65    Input Parameters:
66 +  A - the MAIJ matrix
67 -  dof - the block size for the new matrix
68 
69    Output Parameter:
70 .  B - the new MAIJ matrix
71 
72    Level: advanced
73 
74 .seealso: `MatCreateMAIJ()`
75 @*/
76 PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) {
77   Mat Aij = NULL;
78 
79   PetscFunctionBegin;
80   PetscValidLogicalCollectiveInt(A, dof, 2);
81   PetscCall(MatMAIJGetAIJ(A, &Aij));
82   PetscCall(MatCreateMAIJ(Aij, dof, B));
83   PetscFunctionReturn(0);
84 }
85 
86 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) {
87   Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
88 
89   PetscFunctionBegin;
90   PetscCall(MatDestroy(&b->AIJ));
91   PetscCall(PetscFree(A->data));
92   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL));
93   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL));
94   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL));
95   PetscFunctionReturn(0);
96 }
97 
98 PetscErrorCode MatSetUp_MAIJ(Mat A) {
99   PetscFunctionBegin;
100   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
101 }
102 
103 PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) {
104   Mat B;
105 
106   PetscFunctionBegin;
107   PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
108   PetscCall(MatView(B, viewer));
109   PetscCall(MatDestroy(&B));
110   PetscFunctionReturn(0);
111 }
112 
113 PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) {
114   Mat B;
115 
116   PetscFunctionBegin;
117   PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B));
118   PetscCall(MatView(B, viewer));
119   PetscCall(MatDestroy(&B));
120   PetscFunctionReturn(0);
121 }
122 
123 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) {
124   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
125 
126   PetscFunctionBegin;
127   PetscCall(MatDestroy(&b->AIJ));
128   PetscCall(MatDestroy(&b->OAIJ));
129   PetscCall(MatDestroy(&b->A));
130   PetscCall(VecScatterDestroy(&b->ctx));
131   PetscCall(VecDestroy(&b->w));
132   PetscCall(PetscFree(A->data));
133   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL));
134   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL));
135   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL));
136   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
137   PetscFunctionReturn(0);
138 }
139 
140 /*MC
141   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
142   multicomponent problems, interpolating or restricting each component the same way independently.
143   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
144 
145   Operations provided:
146 .vb
147     MatMult
148     MatMultTranspose
149     MatMultAdd
150     MatMultTransposeAdd
151 .ve
152 
153   Level: advanced
154 
155 .seealso: `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
156 M*/
157 
158 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) {
159   Mat_MPIMAIJ *b;
160   PetscMPIInt  size;
161 
162   PetscFunctionBegin;
163   PetscCall(PetscNewLog(A, &b));
164   A->data = (void *)b;
165 
166   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
167 
168   A->ops->setup = MatSetUp_MAIJ;
169 
170   b->AIJ  = NULL;
171   b->dof  = 0;
172   b->OAIJ = NULL;
173   b->ctx  = NULL;
174   b->w    = NULL;
175   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
176   if (size == 1) {
177     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ));
178   } else {
179     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ));
180   }
181   A->preallocated = PETSC_TRUE;
182   A->assembled    = PETSC_TRUE;
183   PetscFunctionReturn(0);
184 }
185 
186 /* --------------------------------------------------------------------------------------*/
187 PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy) {
188   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
189   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
190   const PetscScalar *x, *v;
191   PetscScalar       *y, sum1, sum2;
192   PetscInt           nonzerorow = 0, n, i, jrow, j;
193   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
194 
195   PetscFunctionBegin;
196   PetscCall(VecGetArrayRead(xx, &x));
197   PetscCall(VecGetArray(yy, &y));
198   idx = a->j;
199   v   = a->a;
200   ii  = a->i;
201 
202   for (i = 0; i < m; i++) {
203     jrow = ii[i];
204     n    = ii[i + 1] - jrow;
205     sum1 = 0.0;
206     sum2 = 0.0;
207 
208     nonzerorow += (n > 0);
209     for (j = 0; j < n; j++) {
210       sum1 += v[jrow] * x[2 * idx[jrow]];
211       sum2 += v[jrow] * x[2 * idx[jrow] + 1];
212       jrow++;
213     }
214     y[2 * i]     = sum1;
215     y[2 * i + 1] = sum2;
216   }
217 
218   PetscCall(PetscLogFlops(4.0 * a->nz - 2.0 * nonzerorow));
219   PetscCall(VecRestoreArrayRead(xx, &x));
220   PetscCall(VecRestoreArray(yy, &y));
221   PetscFunctionReturn(0);
222 }
223 
224 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy) {
225   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
226   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
227   const PetscScalar *x, *v;
228   PetscScalar       *y, alpha1, alpha2;
229   PetscInt           n, i;
230   const PetscInt     m = b->AIJ->rmap->n, *idx;
231 
232   PetscFunctionBegin;
233   PetscCall(VecSet(yy, 0.0));
234   PetscCall(VecGetArrayRead(xx, &x));
235   PetscCall(VecGetArray(yy, &y));
236 
237   for (i = 0; i < m; i++) {
238     idx    = a->j + a->i[i];
239     v      = a->a + a->i[i];
240     n      = a->i[i + 1] - a->i[i];
241     alpha1 = x[2 * i];
242     alpha2 = x[2 * i + 1];
243     while (n-- > 0) {
244       y[2 * (*idx)] += alpha1 * (*v);
245       y[2 * (*idx) + 1] += alpha2 * (*v);
246       idx++;
247       v++;
248     }
249   }
250   PetscCall(PetscLogFlops(4.0 * a->nz));
251   PetscCall(VecRestoreArrayRead(xx, &x));
252   PetscCall(VecRestoreArray(yy, &y));
253   PetscFunctionReturn(0);
254 }
255 
256 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) {
257   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
258   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
259   const PetscScalar *x, *v;
260   PetscScalar       *y, sum1, sum2;
261   PetscInt           n, i, jrow, j;
262   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
263 
264   PetscFunctionBegin;
265   if (yy != zz) PetscCall(VecCopy(yy, zz));
266   PetscCall(VecGetArrayRead(xx, &x));
267   PetscCall(VecGetArray(zz, &y));
268   idx = a->j;
269   v   = a->a;
270   ii  = a->i;
271 
272   for (i = 0; i < m; i++) {
273     jrow = ii[i];
274     n    = ii[i + 1] - jrow;
275     sum1 = 0.0;
276     sum2 = 0.0;
277     for (j = 0; j < n; j++) {
278       sum1 += v[jrow] * x[2 * idx[jrow]];
279       sum2 += v[jrow] * x[2 * idx[jrow] + 1];
280       jrow++;
281     }
282     y[2 * i] += sum1;
283     y[2 * i + 1] += sum2;
284   }
285 
286   PetscCall(PetscLogFlops(4.0 * a->nz));
287   PetscCall(VecRestoreArrayRead(xx, &x));
288   PetscCall(VecRestoreArray(zz, &y));
289   PetscFunctionReturn(0);
290 }
291 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) {
292   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
293   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
294   const PetscScalar *x, *v;
295   PetscScalar       *y, alpha1, alpha2;
296   PetscInt           n, i;
297   const PetscInt     m = b->AIJ->rmap->n, *idx;
298 
299   PetscFunctionBegin;
300   if (yy != zz) PetscCall(VecCopy(yy, zz));
301   PetscCall(VecGetArrayRead(xx, &x));
302   PetscCall(VecGetArray(zz, &y));
303 
304   for (i = 0; i < m; i++) {
305     idx    = a->j + a->i[i];
306     v      = a->a + a->i[i];
307     n      = a->i[i + 1] - a->i[i];
308     alpha1 = x[2 * i];
309     alpha2 = x[2 * i + 1];
310     while (n-- > 0) {
311       y[2 * (*idx)] += alpha1 * (*v);
312       y[2 * (*idx) + 1] += alpha2 * (*v);
313       idx++;
314       v++;
315     }
316   }
317   PetscCall(PetscLogFlops(4.0 * a->nz));
318   PetscCall(VecRestoreArrayRead(xx, &x));
319   PetscCall(VecRestoreArray(zz, &y));
320   PetscFunctionReturn(0);
321 }
322 /* --------------------------------------------------------------------------------------*/
323 PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy) {
324   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
325   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
326   const PetscScalar *x, *v;
327   PetscScalar       *y, sum1, sum2, sum3;
328   PetscInt           nonzerorow = 0, n, i, jrow, j;
329   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
330 
331   PetscFunctionBegin;
332   PetscCall(VecGetArrayRead(xx, &x));
333   PetscCall(VecGetArray(yy, &y));
334   idx = a->j;
335   v   = a->a;
336   ii  = a->i;
337 
338   for (i = 0; i < m; i++) {
339     jrow = ii[i];
340     n    = ii[i + 1] - jrow;
341     sum1 = 0.0;
342     sum2 = 0.0;
343     sum3 = 0.0;
344 
345     nonzerorow += (n > 0);
346     for (j = 0; j < n; j++) {
347       sum1 += v[jrow] * x[3 * idx[jrow]];
348       sum2 += v[jrow] * x[3 * idx[jrow] + 1];
349       sum3 += v[jrow] * x[3 * idx[jrow] + 2];
350       jrow++;
351     }
352     y[3 * i]     = sum1;
353     y[3 * i + 1] = sum2;
354     y[3 * i + 2] = sum3;
355   }
356 
357   PetscCall(PetscLogFlops(6.0 * a->nz - 3.0 * nonzerorow));
358   PetscCall(VecRestoreArrayRead(xx, &x));
359   PetscCall(VecRestoreArray(yy, &y));
360   PetscFunctionReturn(0);
361 }
362 
363 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy) {
364   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
365   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
366   const PetscScalar *x, *v;
367   PetscScalar       *y, alpha1, alpha2, alpha3;
368   PetscInt           n, i;
369   const PetscInt     m = b->AIJ->rmap->n, *idx;
370 
371   PetscFunctionBegin;
372   PetscCall(VecSet(yy, 0.0));
373   PetscCall(VecGetArrayRead(xx, &x));
374   PetscCall(VecGetArray(yy, &y));
375 
376   for (i = 0; i < m; i++) {
377     idx    = a->j + a->i[i];
378     v      = a->a + a->i[i];
379     n      = a->i[i + 1] - a->i[i];
380     alpha1 = x[3 * i];
381     alpha2 = x[3 * i + 1];
382     alpha3 = x[3 * i + 2];
383     while (n-- > 0) {
384       y[3 * (*idx)] += alpha1 * (*v);
385       y[3 * (*idx) + 1] += alpha2 * (*v);
386       y[3 * (*idx) + 2] += alpha3 * (*v);
387       idx++;
388       v++;
389     }
390   }
391   PetscCall(PetscLogFlops(6.0 * a->nz));
392   PetscCall(VecRestoreArrayRead(xx, &x));
393   PetscCall(VecRestoreArray(yy, &y));
394   PetscFunctionReturn(0);
395 }
396 
397 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) {
398   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
399   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
400   const PetscScalar *x, *v;
401   PetscScalar       *y, sum1, sum2, sum3;
402   PetscInt           n, i, jrow, j;
403   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
404 
405   PetscFunctionBegin;
406   if (yy != zz) PetscCall(VecCopy(yy, zz));
407   PetscCall(VecGetArrayRead(xx, &x));
408   PetscCall(VecGetArray(zz, &y));
409   idx = a->j;
410   v   = a->a;
411   ii  = a->i;
412 
413   for (i = 0; i < m; i++) {
414     jrow = ii[i];
415     n    = ii[i + 1] - jrow;
416     sum1 = 0.0;
417     sum2 = 0.0;
418     sum3 = 0.0;
419     for (j = 0; j < n; j++) {
420       sum1 += v[jrow] * x[3 * idx[jrow]];
421       sum2 += v[jrow] * x[3 * idx[jrow] + 1];
422       sum3 += v[jrow] * x[3 * idx[jrow] + 2];
423       jrow++;
424     }
425     y[3 * i] += sum1;
426     y[3 * i + 1] += sum2;
427     y[3 * i + 2] += sum3;
428   }
429 
430   PetscCall(PetscLogFlops(6.0 * a->nz));
431   PetscCall(VecRestoreArrayRead(xx, &x));
432   PetscCall(VecRestoreArray(zz, &y));
433   PetscFunctionReturn(0);
434 }
435 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) {
436   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
437   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
438   const PetscScalar *x, *v;
439   PetscScalar       *y, alpha1, alpha2, alpha3;
440   PetscInt           n, i;
441   const PetscInt     m = b->AIJ->rmap->n, *idx;
442 
443   PetscFunctionBegin;
444   if (yy != zz) PetscCall(VecCopy(yy, zz));
445   PetscCall(VecGetArrayRead(xx, &x));
446   PetscCall(VecGetArray(zz, &y));
447   for (i = 0; i < m; i++) {
448     idx    = a->j + a->i[i];
449     v      = a->a + a->i[i];
450     n      = a->i[i + 1] - a->i[i];
451     alpha1 = x[3 * i];
452     alpha2 = x[3 * i + 1];
453     alpha3 = x[3 * i + 2];
454     while (n-- > 0) {
455       y[3 * (*idx)] += alpha1 * (*v);
456       y[3 * (*idx) + 1] += alpha2 * (*v);
457       y[3 * (*idx) + 2] += alpha3 * (*v);
458       idx++;
459       v++;
460     }
461   }
462   PetscCall(PetscLogFlops(6.0 * a->nz));
463   PetscCall(VecRestoreArrayRead(xx, &x));
464   PetscCall(VecRestoreArray(zz, &y));
465   PetscFunctionReturn(0);
466 }
467 
468 /* ------------------------------------------------------------------------------*/
469 PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy) {
470   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
471   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
472   const PetscScalar *x, *v;
473   PetscScalar       *y, sum1, sum2, sum3, sum4;
474   PetscInt           nonzerorow = 0, n, i, jrow, j;
475   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
476 
477   PetscFunctionBegin;
478   PetscCall(VecGetArrayRead(xx, &x));
479   PetscCall(VecGetArray(yy, &y));
480   idx = a->j;
481   v   = a->a;
482   ii  = a->i;
483 
484   for (i = 0; i < m; i++) {
485     jrow = ii[i];
486     n    = ii[i + 1] - jrow;
487     sum1 = 0.0;
488     sum2 = 0.0;
489     sum3 = 0.0;
490     sum4 = 0.0;
491     nonzerorow += (n > 0);
492     for (j = 0; j < n; j++) {
493       sum1 += v[jrow] * x[4 * idx[jrow]];
494       sum2 += v[jrow] * x[4 * idx[jrow] + 1];
495       sum3 += v[jrow] * x[4 * idx[jrow] + 2];
496       sum4 += v[jrow] * x[4 * idx[jrow] + 3];
497       jrow++;
498     }
499     y[4 * i]     = sum1;
500     y[4 * i + 1] = sum2;
501     y[4 * i + 2] = sum3;
502     y[4 * i + 3] = sum4;
503   }
504 
505   PetscCall(PetscLogFlops(8.0 * a->nz - 4.0 * nonzerorow));
506   PetscCall(VecRestoreArrayRead(xx, &x));
507   PetscCall(VecRestoreArray(yy, &y));
508   PetscFunctionReturn(0);
509 }
510 
511 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy) {
512   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
513   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
514   const PetscScalar *x, *v;
515   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4;
516   PetscInt           n, i;
517   const PetscInt     m = b->AIJ->rmap->n, *idx;
518 
519   PetscFunctionBegin;
520   PetscCall(VecSet(yy, 0.0));
521   PetscCall(VecGetArrayRead(xx, &x));
522   PetscCall(VecGetArray(yy, &y));
523   for (i = 0; i < m; i++) {
524     idx    = a->j + a->i[i];
525     v      = a->a + a->i[i];
526     n      = a->i[i + 1] - a->i[i];
527     alpha1 = x[4 * i];
528     alpha2 = x[4 * i + 1];
529     alpha3 = x[4 * i + 2];
530     alpha4 = x[4 * i + 3];
531     while (n-- > 0) {
532       y[4 * (*idx)] += alpha1 * (*v);
533       y[4 * (*idx) + 1] += alpha2 * (*v);
534       y[4 * (*idx) + 2] += alpha3 * (*v);
535       y[4 * (*idx) + 3] += alpha4 * (*v);
536       idx++;
537       v++;
538     }
539   }
540   PetscCall(PetscLogFlops(8.0 * a->nz));
541   PetscCall(VecRestoreArrayRead(xx, &x));
542   PetscCall(VecRestoreArray(yy, &y));
543   PetscFunctionReturn(0);
544 }
545 
546 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) {
547   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
548   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
549   const PetscScalar *x, *v;
550   PetscScalar       *y, sum1, sum2, sum3, sum4;
551   PetscInt           n, i, jrow, j;
552   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
553 
554   PetscFunctionBegin;
555   if (yy != zz) PetscCall(VecCopy(yy, zz));
556   PetscCall(VecGetArrayRead(xx, &x));
557   PetscCall(VecGetArray(zz, &y));
558   idx = a->j;
559   v   = a->a;
560   ii  = a->i;
561 
562   for (i = 0; i < m; i++) {
563     jrow = ii[i];
564     n    = ii[i + 1] - jrow;
565     sum1 = 0.0;
566     sum2 = 0.0;
567     sum3 = 0.0;
568     sum4 = 0.0;
569     for (j = 0; j < n; j++) {
570       sum1 += v[jrow] * x[4 * idx[jrow]];
571       sum2 += v[jrow] * x[4 * idx[jrow] + 1];
572       sum3 += v[jrow] * x[4 * idx[jrow] + 2];
573       sum4 += v[jrow] * x[4 * idx[jrow] + 3];
574       jrow++;
575     }
576     y[4 * i] += sum1;
577     y[4 * i + 1] += sum2;
578     y[4 * i + 2] += sum3;
579     y[4 * i + 3] += sum4;
580   }
581 
582   PetscCall(PetscLogFlops(8.0 * a->nz));
583   PetscCall(VecRestoreArrayRead(xx, &x));
584   PetscCall(VecRestoreArray(zz, &y));
585   PetscFunctionReturn(0);
586 }
587 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) {
588   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
589   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
590   const PetscScalar *x, *v;
591   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4;
592   PetscInt           n, i;
593   const PetscInt     m = b->AIJ->rmap->n, *idx;
594 
595   PetscFunctionBegin;
596   if (yy != zz) PetscCall(VecCopy(yy, zz));
597   PetscCall(VecGetArrayRead(xx, &x));
598   PetscCall(VecGetArray(zz, &y));
599 
600   for (i = 0; i < m; i++) {
601     idx    = a->j + a->i[i];
602     v      = a->a + a->i[i];
603     n      = a->i[i + 1] - a->i[i];
604     alpha1 = x[4 * i];
605     alpha2 = x[4 * i + 1];
606     alpha3 = x[4 * i + 2];
607     alpha4 = x[4 * i + 3];
608     while (n-- > 0) {
609       y[4 * (*idx)] += alpha1 * (*v);
610       y[4 * (*idx) + 1] += alpha2 * (*v);
611       y[4 * (*idx) + 2] += alpha3 * (*v);
612       y[4 * (*idx) + 3] += alpha4 * (*v);
613       idx++;
614       v++;
615     }
616   }
617   PetscCall(PetscLogFlops(8.0 * a->nz));
618   PetscCall(VecRestoreArrayRead(xx, &x));
619   PetscCall(VecRestoreArray(zz, &y));
620   PetscFunctionReturn(0);
621 }
622 /* ------------------------------------------------------------------------------*/
623 
624 PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy) {
625   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
626   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
627   const PetscScalar *x, *v;
628   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5;
629   PetscInt           nonzerorow = 0, n, i, jrow, j;
630   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
631 
632   PetscFunctionBegin;
633   PetscCall(VecGetArrayRead(xx, &x));
634   PetscCall(VecGetArray(yy, &y));
635   idx = a->j;
636   v   = a->a;
637   ii  = a->i;
638 
639   for (i = 0; i < m; i++) {
640     jrow = ii[i];
641     n    = ii[i + 1] - jrow;
642     sum1 = 0.0;
643     sum2 = 0.0;
644     sum3 = 0.0;
645     sum4 = 0.0;
646     sum5 = 0.0;
647 
648     nonzerorow += (n > 0);
649     for (j = 0; j < n; j++) {
650       sum1 += v[jrow] * x[5 * idx[jrow]];
651       sum2 += v[jrow] * x[5 * idx[jrow] + 1];
652       sum3 += v[jrow] * x[5 * idx[jrow] + 2];
653       sum4 += v[jrow] * x[5 * idx[jrow] + 3];
654       sum5 += v[jrow] * x[5 * idx[jrow] + 4];
655       jrow++;
656     }
657     y[5 * i]     = sum1;
658     y[5 * i + 1] = sum2;
659     y[5 * i + 2] = sum3;
660     y[5 * i + 3] = sum4;
661     y[5 * i + 4] = sum5;
662   }
663 
664   PetscCall(PetscLogFlops(10.0 * a->nz - 5.0 * nonzerorow));
665   PetscCall(VecRestoreArrayRead(xx, &x));
666   PetscCall(VecRestoreArray(yy, &y));
667   PetscFunctionReturn(0);
668 }
669 
670 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy) {
671   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
672   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
673   const PetscScalar *x, *v;
674   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5;
675   PetscInt           n, i;
676   const PetscInt     m = b->AIJ->rmap->n, *idx;
677 
678   PetscFunctionBegin;
679   PetscCall(VecSet(yy, 0.0));
680   PetscCall(VecGetArrayRead(xx, &x));
681   PetscCall(VecGetArray(yy, &y));
682 
683   for (i = 0; i < m; i++) {
684     idx    = a->j + a->i[i];
685     v      = a->a + a->i[i];
686     n      = a->i[i + 1] - a->i[i];
687     alpha1 = x[5 * i];
688     alpha2 = x[5 * i + 1];
689     alpha3 = x[5 * i + 2];
690     alpha4 = x[5 * i + 3];
691     alpha5 = x[5 * i + 4];
692     while (n-- > 0) {
693       y[5 * (*idx)] += alpha1 * (*v);
694       y[5 * (*idx) + 1] += alpha2 * (*v);
695       y[5 * (*idx) + 2] += alpha3 * (*v);
696       y[5 * (*idx) + 3] += alpha4 * (*v);
697       y[5 * (*idx) + 4] += alpha5 * (*v);
698       idx++;
699       v++;
700     }
701   }
702   PetscCall(PetscLogFlops(10.0 * a->nz));
703   PetscCall(VecRestoreArrayRead(xx, &x));
704   PetscCall(VecRestoreArray(yy, &y));
705   PetscFunctionReturn(0);
706 }
707 
708 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) {
709   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
710   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
711   const PetscScalar *x, *v;
712   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5;
713   PetscInt           n, i, jrow, j;
714   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
715 
716   PetscFunctionBegin;
717   if (yy != zz) PetscCall(VecCopy(yy, zz));
718   PetscCall(VecGetArrayRead(xx, &x));
719   PetscCall(VecGetArray(zz, &y));
720   idx = a->j;
721   v   = a->a;
722   ii  = a->i;
723 
724   for (i = 0; i < m; i++) {
725     jrow = ii[i];
726     n    = ii[i + 1] - jrow;
727     sum1 = 0.0;
728     sum2 = 0.0;
729     sum3 = 0.0;
730     sum4 = 0.0;
731     sum5 = 0.0;
732     for (j = 0; j < n; j++) {
733       sum1 += v[jrow] * x[5 * idx[jrow]];
734       sum2 += v[jrow] * x[5 * idx[jrow] + 1];
735       sum3 += v[jrow] * x[5 * idx[jrow] + 2];
736       sum4 += v[jrow] * x[5 * idx[jrow] + 3];
737       sum5 += v[jrow] * x[5 * idx[jrow] + 4];
738       jrow++;
739     }
740     y[5 * i] += sum1;
741     y[5 * i + 1] += sum2;
742     y[5 * i + 2] += sum3;
743     y[5 * i + 3] += sum4;
744     y[5 * i + 4] += sum5;
745   }
746 
747   PetscCall(PetscLogFlops(10.0 * a->nz));
748   PetscCall(VecRestoreArrayRead(xx, &x));
749   PetscCall(VecRestoreArray(zz, &y));
750   PetscFunctionReturn(0);
751 }
752 
753 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) {
754   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
755   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
756   const PetscScalar *x, *v;
757   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5;
758   PetscInt           n, i;
759   const PetscInt     m = b->AIJ->rmap->n, *idx;
760 
761   PetscFunctionBegin;
762   if (yy != zz) PetscCall(VecCopy(yy, zz));
763   PetscCall(VecGetArrayRead(xx, &x));
764   PetscCall(VecGetArray(zz, &y));
765 
766   for (i = 0; i < m; i++) {
767     idx    = a->j + a->i[i];
768     v      = a->a + a->i[i];
769     n      = a->i[i + 1] - a->i[i];
770     alpha1 = x[5 * i];
771     alpha2 = x[5 * i + 1];
772     alpha3 = x[5 * i + 2];
773     alpha4 = x[5 * i + 3];
774     alpha5 = x[5 * i + 4];
775     while (n-- > 0) {
776       y[5 * (*idx)] += alpha1 * (*v);
777       y[5 * (*idx) + 1] += alpha2 * (*v);
778       y[5 * (*idx) + 2] += alpha3 * (*v);
779       y[5 * (*idx) + 3] += alpha4 * (*v);
780       y[5 * (*idx) + 4] += alpha5 * (*v);
781       idx++;
782       v++;
783     }
784   }
785   PetscCall(PetscLogFlops(10.0 * a->nz));
786   PetscCall(VecRestoreArrayRead(xx, &x));
787   PetscCall(VecRestoreArray(zz, &y));
788   PetscFunctionReturn(0);
789 }
790 
791 /* ------------------------------------------------------------------------------*/
792 PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy) {
793   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
794   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
795   const PetscScalar *x, *v;
796   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6;
797   PetscInt           nonzerorow = 0, n, i, jrow, j;
798   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
799 
800   PetscFunctionBegin;
801   PetscCall(VecGetArrayRead(xx, &x));
802   PetscCall(VecGetArray(yy, &y));
803   idx = a->j;
804   v   = a->a;
805   ii  = a->i;
806 
807   for (i = 0; i < m; i++) {
808     jrow = ii[i];
809     n    = ii[i + 1] - jrow;
810     sum1 = 0.0;
811     sum2 = 0.0;
812     sum3 = 0.0;
813     sum4 = 0.0;
814     sum5 = 0.0;
815     sum6 = 0.0;
816 
817     nonzerorow += (n > 0);
818     for (j = 0; j < n; j++) {
819       sum1 += v[jrow] * x[6 * idx[jrow]];
820       sum2 += v[jrow] * x[6 * idx[jrow] + 1];
821       sum3 += v[jrow] * x[6 * idx[jrow] + 2];
822       sum4 += v[jrow] * x[6 * idx[jrow] + 3];
823       sum5 += v[jrow] * x[6 * idx[jrow] + 4];
824       sum6 += v[jrow] * x[6 * idx[jrow] + 5];
825       jrow++;
826     }
827     y[6 * i]     = sum1;
828     y[6 * i + 1] = sum2;
829     y[6 * i + 2] = sum3;
830     y[6 * i + 3] = sum4;
831     y[6 * i + 4] = sum5;
832     y[6 * i + 5] = sum6;
833   }
834 
835   PetscCall(PetscLogFlops(12.0 * a->nz - 6.0 * nonzerorow));
836   PetscCall(VecRestoreArrayRead(xx, &x));
837   PetscCall(VecRestoreArray(yy, &y));
838   PetscFunctionReturn(0);
839 }
840 
841 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy) {
842   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
843   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
844   const PetscScalar *x, *v;
845   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6;
846   PetscInt           n, i;
847   const PetscInt     m = b->AIJ->rmap->n, *idx;
848 
849   PetscFunctionBegin;
850   PetscCall(VecSet(yy, 0.0));
851   PetscCall(VecGetArrayRead(xx, &x));
852   PetscCall(VecGetArray(yy, &y));
853 
854   for (i = 0; i < m; i++) {
855     idx    = a->j + a->i[i];
856     v      = a->a + a->i[i];
857     n      = a->i[i + 1] - a->i[i];
858     alpha1 = x[6 * i];
859     alpha2 = x[6 * i + 1];
860     alpha3 = x[6 * i + 2];
861     alpha4 = x[6 * i + 3];
862     alpha5 = x[6 * i + 4];
863     alpha6 = x[6 * i + 5];
864     while (n-- > 0) {
865       y[6 * (*idx)] += alpha1 * (*v);
866       y[6 * (*idx) + 1] += alpha2 * (*v);
867       y[6 * (*idx) + 2] += alpha3 * (*v);
868       y[6 * (*idx) + 3] += alpha4 * (*v);
869       y[6 * (*idx) + 4] += alpha5 * (*v);
870       y[6 * (*idx) + 5] += alpha6 * (*v);
871       idx++;
872       v++;
873     }
874   }
875   PetscCall(PetscLogFlops(12.0 * a->nz));
876   PetscCall(VecRestoreArrayRead(xx, &x));
877   PetscCall(VecRestoreArray(yy, &y));
878   PetscFunctionReturn(0);
879 }
880 
881 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) {
882   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
883   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
884   const PetscScalar *x, *v;
885   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6;
886   PetscInt           n, i, jrow, j;
887   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
888 
889   PetscFunctionBegin;
890   if (yy != zz) PetscCall(VecCopy(yy, zz));
891   PetscCall(VecGetArrayRead(xx, &x));
892   PetscCall(VecGetArray(zz, &y));
893   idx = a->j;
894   v   = a->a;
895   ii  = a->i;
896 
897   for (i = 0; i < m; i++) {
898     jrow = ii[i];
899     n    = ii[i + 1] - jrow;
900     sum1 = 0.0;
901     sum2 = 0.0;
902     sum3 = 0.0;
903     sum4 = 0.0;
904     sum5 = 0.0;
905     sum6 = 0.0;
906     for (j = 0; j < n; j++) {
907       sum1 += v[jrow] * x[6 * idx[jrow]];
908       sum2 += v[jrow] * x[6 * idx[jrow] + 1];
909       sum3 += v[jrow] * x[6 * idx[jrow] + 2];
910       sum4 += v[jrow] * x[6 * idx[jrow] + 3];
911       sum5 += v[jrow] * x[6 * idx[jrow] + 4];
912       sum6 += v[jrow] * x[6 * idx[jrow] + 5];
913       jrow++;
914     }
915     y[6 * i] += sum1;
916     y[6 * i + 1] += sum2;
917     y[6 * i + 2] += sum3;
918     y[6 * i + 3] += sum4;
919     y[6 * i + 4] += sum5;
920     y[6 * i + 5] += sum6;
921   }
922 
923   PetscCall(PetscLogFlops(12.0 * a->nz));
924   PetscCall(VecRestoreArrayRead(xx, &x));
925   PetscCall(VecRestoreArray(zz, &y));
926   PetscFunctionReturn(0);
927 }
928 
929 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) {
930   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
931   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
932   const PetscScalar *x, *v;
933   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6;
934   PetscInt           n, i;
935   const PetscInt     m = b->AIJ->rmap->n, *idx;
936 
937   PetscFunctionBegin;
938   if (yy != zz) PetscCall(VecCopy(yy, zz));
939   PetscCall(VecGetArrayRead(xx, &x));
940   PetscCall(VecGetArray(zz, &y));
941 
942   for (i = 0; i < m; i++) {
943     idx    = a->j + a->i[i];
944     v      = a->a + a->i[i];
945     n      = a->i[i + 1] - a->i[i];
946     alpha1 = x[6 * i];
947     alpha2 = x[6 * i + 1];
948     alpha3 = x[6 * i + 2];
949     alpha4 = x[6 * i + 3];
950     alpha5 = x[6 * i + 4];
951     alpha6 = x[6 * i + 5];
952     while (n-- > 0) {
953       y[6 * (*idx)] += alpha1 * (*v);
954       y[6 * (*idx) + 1] += alpha2 * (*v);
955       y[6 * (*idx) + 2] += alpha3 * (*v);
956       y[6 * (*idx) + 3] += alpha4 * (*v);
957       y[6 * (*idx) + 4] += alpha5 * (*v);
958       y[6 * (*idx) + 5] += alpha6 * (*v);
959       idx++;
960       v++;
961     }
962   }
963   PetscCall(PetscLogFlops(12.0 * a->nz));
964   PetscCall(VecRestoreArrayRead(xx, &x));
965   PetscCall(VecRestoreArray(zz, &y));
966   PetscFunctionReturn(0);
967 }
968 
969 /* ------------------------------------------------------------------------------*/
970 PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy) {
971   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
972   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
973   const PetscScalar *x, *v;
974   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
975   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
976   PetscInt           nonzerorow = 0, n, i, jrow, j;
977 
978   PetscFunctionBegin;
979   PetscCall(VecGetArrayRead(xx, &x));
980   PetscCall(VecGetArray(yy, &y));
981   idx = a->j;
982   v   = a->a;
983   ii  = a->i;
984 
985   for (i = 0; i < m; i++) {
986     jrow = ii[i];
987     n    = ii[i + 1] - jrow;
988     sum1 = 0.0;
989     sum2 = 0.0;
990     sum3 = 0.0;
991     sum4 = 0.0;
992     sum5 = 0.0;
993     sum6 = 0.0;
994     sum7 = 0.0;
995 
996     nonzerorow += (n > 0);
997     for (j = 0; j < n; j++) {
998       sum1 += v[jrow] * x[7 * idx[jrow]];
999       sum2 += v[jrow] * x[7 * idx[jrow] + 1];
1000       sum3 += v[jrow] * x[7 * idx[jrow] + 2];
1001       sum4 += v[jrow] * x[7 * idx[jrow] + 3];
1002       sum5 += v[jrow] * x[7 * idx[jrow] + 4];
1003       sum6 += v[jrow] * x[7 * idx[jrow] + 5];
1004       sum7 += v[jrow] * x[7 * idx[jrow] + 6];
1005       jrow++;
1006     }
1007     y[7 * i]     = sum1;
1008     y[7 * i + 1] = sum2;
1009     y[7 * i + 2] = sum3;
1010     y[7 * i + 3] = sum4;
1011     y[7 * i + 4] = sum5;
1012     y[7 * i + 5] = sum6;
1013     y[7 * i + 6] = sum7;
1014   }
1015 
1016   PetscCall(PetscLogFlops(14.0 * a->nz - 7.0 * nonzerorow));
1017   PetscCall(VecRestoreArrayRead(xx, &x));
1018   PetscCall(VecRestoreArray(yy, &y));
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy) {
1023   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1024   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1025   const PetscScalar *x, *v;
1026   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7;
1027   const PetscInt     m = b->AIJ->rmap->n, *idx;
1028   PetscInt           n, i;
1029 
1030   PetscFunctionBegin;
1031   PetscCall(VecSet(yy, 0.0));
1032   PetscCall(VecGetArrayRead(xx, &x));
1033   PetscCall(VecGetArray(yy, &y));
1034 
1035   for (i = 0; i < m; i++) {
1036     idx    = a->j + a->i[i];
1037     v      = a->a + a->i[i];
1038     n      = a->i[i + 1] - a->i[i];
1039     alpha1 = x[7 * i];
1040     alpha2 = x[7 * i + 1];
1041     alpha3 = x[7 * i + 2];
1042     alpha4 = x[7 * i + 3];
1043     alpha5 = x[7 * i + 4];
1044     alpha6 = x[7 * i + 5];
1045     alpha7 = x[7 * i + 6];
1046     while (n-- > 0) {
1047       y[7 * (*idx)] += alpha1 * (*v);
1048       y[7 * (*idx) + 1] += alpha2 * (*v);
1049       y[7 * (*idx) + 2] += alpha3 * (*v);
1050       y[7 * (*idx) + 3] += alpha4 * (*v);
1051       y[7 * (*idx) + 4] += alpha5 * (*v);
1052       y[7 * (*idx) + 5] += alpha6 * (*v);
1053       y[7 * (*idx) + 6] += alpha7 * (*v);
1054       idx++;
1055       v++;
1056     }
1057   }
1058   PetscCall(PetscLogFlops(14.0 * a->nz));
1059   PetscCall(VecRestoreArrayRead(xx, &x));
1060   PetscCall(VecRestoreArray(yy, &y));
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) {
1065   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1066   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1067   const PetscScalar *x, *v;
1068   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1069   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1070   PetscInt           n, i, jrow, j;
1071 
1072   PetscFunctionBegin;
1073   if (yy != zz) PetscCall(VecCopy(yy, zz));
1074   PetscCall(VecGetArrayRead(xx, &x));
1075   PetscCall(VecGetArray(zz, &y));
1076   idx = a->j;
1077   v   = a->a;
1078   ii  = a->i;
1079 
1080   for (i = 0; i < m; i++) {
1081     jrow = ii[i];
1082     n    = ii[i + 1] - jrow;
1083     sum1 = 0.0;
1084     sum2 = 0.0;
1085     sum3 = 0.0;
1086     sum4 = 0.0;
1087     sum5 = 0.0;
1088     sum6 = 0.0;
1089     sum7 = 0.0;
1090     for (j = 0; j < n; j++) {
1091       sum1 += v[jrow] * x[7 * idx[jrow]];
1092       sum2 += v[jrow] * x[7 * idx[jrow] + 1];
1093       sum3 += v[jrow] * x[7 * idx[jrow] + 2];
1094       sum4 += v[jrow] * x[7 * idx[jrow] + 3];
1095       sum5 += v[jrow] * x[7 * idx[jrow] + 4];
1096       sum6 += v[jrow] * x[7 * idx[jrow] + 5];
1097       sum7 += v[jrow] * x[7 * idx[jrow] + 6];
1098       jrow++;
1099     }
1100     y[7 * i] += sum1;
1101     y[7 * i + 1] += sum2;
1102     y[7 * i + 2] += sum3;
1103     y[7 * i + 3] += sum4;
1104     y[7 * i + 4] += sum5;
1105     y[7 * i + 5] += sum6;
1106     y[7 * i + 6] += sum7;
1107   }
1108 
1109   PetscCall(PetscLogFlops(14.0 * a->nz));
1110   PetscCall(VecRestoreArrayRead(xx, &x));
1111   PetscCall(VecRestoreArray(zz, &y));
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) {
1116   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1117   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1118   const PetscScalar *x, *v;
1119   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7;
1120   const PetscInt     m = b->AIJ->rmap->n, *idx;
1121   PetscInt           n, i;
1122 
1123   PetscFunctionBegin;
1124   if (yy != zz) PetscCall(VecCopy(yy, zz));
1125   PetscCall(VecGetArrayRead(xx, &x));
1126   PetscCall(VecGetArray(zz, &y));
1127   for (i = 0; i < m; i++) {
1128     idx    = a->j + a->i[i];
1129     v      = a->a + a->i[i];
1130     n      = a->i[i + 1] - a->i[i];
1131     alpha1 = x[7 * i];
1132     alpha2 = x[7 * i + 1];
1133     alpha3 = x[7 * i + 2];
1134     alpha4 = x[7 * i + 3];
1135     alpha5 = x[7 * i + 4];
1136     alpha6 = x[7 * i + 5];
1137     alpha7 = x[7 * i + 6];
1138     while (n-- > 0) {
1139       y[7 * (*idx)] += alpha1 * (*v);
1140       y[7 * (*idx) + 1] += alpha2 * (*v);
1141       y[7 * (*idx) + 2] += alpha3 * (*v);
1142       y[7 * (*idx) + 3] += alpha4 * (*v);
1143       y[7 * (*idx) + 4] += alpha5 * (*v);
1144       y[7 * (*idx) + 5] += alpha6 * (*v);
1145       y[7 * (*idx) + 6] += alpha7 * (*v);
1146       idx++;
1147       v++;
1148     }
1149   }
1150   PetscCall(PetscLogFlops(14.0 * a->nz));
1151   PetscCall(VecRestoreArrayRead(xx, &x));
1152   PetscCall(VecRestoreArray(zz, &y));
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy) {
1157   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1158   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1159   const PetscScalar *x, *v;
1160   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1161   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1162   PetscInt           nonzerorow = 0, n, i, jrow, j;
1163 
1164   PetscFunctionBegin;
1165   PetscCall(VecGetArrayRead(xx, &x));
1166   PetscCall(VecGetArray(yy, &y));
1167   idx = a->j;
1168   v   = a->a;
1169   ii  = a->i;
1170 
1171   for (i = 0; i < m; i++) {
1172     jrow = ii[i];
1173     n    = ii[i + 1] - jrow;
1174     sum1 = 0.0;
1175     sum2 = 0.0;
1176     sum3 = 0.0;
1177     sum4 = 0.0;
1178     sum5 = 0.0;
1179     sum6 = 0.0;
1180     sum7 = 0.0;
1181     sum8 = 0.0;
1182 
1183     nonzerorow += (n > 0);
1184     for (j = 0; j < n; j++) {
1185       sum1 += v[jrow] * x[8 * idx[jrow]];
1186       sum2 += v[jrow] * x[8 * idx[jrow] + 1];
1187       sum3 += v[jrow] * x[8 * idx[jrow] + 2];
1188       sum4 += v[jrow] * x[8 * idx[jrow] + 3];
1189       sum5 += v[jrow] * x[8 * idx[jrow] + 4];
1190       sum6 += v[jrow] * x[8 * idx[jrow] + 5];
1191       sum7 += v[jrow] * x[8 * idx[jrow] + 6];
1192       sum8 += v[jrow] * x[8 * idx[jrow] + 7];
1193       jrow++;
1194     }
1195     y[8 * i]     = sum1;
1196     y[8 * i + 1] = sum2;
1197     y[8 * i + 2] = sum3;
1198     y[8 * i + 3] = sum4;
1199     y[8 * i + 4] = sum5;
1200     y[8 * i + 5] = sum6;
1201     y[8 * i + 6] = sum7;
1202     y[8 * i + 7] = sum8;
1203   }
1204 
1205   PetscCall(PetscLogFlops(16.0 * a->nz - 8.0 * nonzerorow));
1206   PetscCall(VecRestoreArrayRead(xx, &x));
1207   PetscCall(VecRestoreArray(yy, &y));
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy) {
1212   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1213   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1214   const PetscScalar *x, *v;
1215   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
1216   const PetscInt     m = b->AIJ->rmap->n, *idx;
1217   PetscInt           n, i;
1218 
1219   PetscFunctionBegin;
1220   PetscCall(VecSet(yy, 0.0));
1221   PetscCall(VecGetArrayRead(xx, &x));
1222   PetscCall(VecGetArray(yy, &y));
1223 
1224   for (i = 0; i < m; i++) {
1225     idx    = a->j + a->i[i];
1226     v      = a->a + a->i[i];
1227     n      = a->i[i + 1] - a->i[i];
1228     alpha1 = x[8 * i];
1229     alpha2 = x[8 * i + 1];
1230     alpha3 = x[8 * i + 2];
1231     alpha4 = x[8 * i + 3];
1232     alpha5 = x[8 * i + 4];
1233     alpha6 = x[8 * i + 5];
1234     alpha7 = x[8 * i + 6];
1235     alpha8 = x[8 * i + 7];
1236     while (n-- > 0) {
1237       y[8 * (*idx)] += alpha1 * (*v);
1238       y[8 * (*idx) + 1] += alpha2 * (*v);
1239       y[8 * (*idx) + 2] += alpha3 * (*v);
1240       y[8 * (*idx) + 3] += alpha4 * (*v);
1241       y[8 * (*idx) + 4] += alpha5 * (*v);
1242       y[8 * (*idx) + 5] += alpha6 * (*v);
1243       y[8 * (*idx) + 6] += alpha7 * (*v);
1244       y[8 * (*idx) + 7] += alpha8 * (*v);
1245       idx++;
1246       v++;
1247     }
1248   }
1249   PetscCall(PetscLogFlops(16.0 * a->nz));
1250   PetscCall(VecRestoreArrayRead(xx, &x));
1251   PetscCall(VecRestoreArray(yy, &y));
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) {
1256   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1257   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1258   const PetscScalar *x, *v;
1259   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1260   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1261   PetscInt           n, i, jrow, j;
1262 
1263   PetscFunctionBegin;
1264   if (yy != zz) PetscCall(VecCopy(yy, zz));
1265   PetscCall(VecGetArrayRead(xx, &x));
1266   PetscCall(VecGetArray(zz, &y));
1267   idx = a->j;
1268   v   = a->a;
1269   ii  = a->i;
1270 
1271   for (i = 0; i < m; i++) {
1272     jrow = ii[i];
1273     n    = ii[i + 1] - jrow;
1274     sum1 = 0.0;
1275     sum2 = 0.0;
1276     sum3 = 0.0;
1277     sum4 = 0.0;
1278     sum5 = 0.0;
1279     sum6 = 0.0;
1280     sum7 = 0.0;
1281     sum8 = 0.0;
1282     for (j = 0; j < n; j++) {
1283       sum1 += v[jrow] * x[8 * idx[jrow]];
1284       sum2 += v[jrow] * x[8 * idx[jrow] + 1];
1285       sum3 += v[jrow] * x[8 * idx[jrow] + 2];
1286       sum4 += v[jrow] * x[8 * idx[jrow] + 3];
1287       sum5 += v[jrow] * x[8 * idx[jrow] + 4];
1288       sum6 += v[jrow] * x[8 * idx[jrow] + 5];
1289       sum7 += v[jrow] * x[8 * idx[jrow] + 6];
1290       sum8 += v[jrow] * x[8 * idx[jrow] + 7];
1291       jrow++;
1292     }
1293     y[8 * i] += sum1;
1294     y[8 * i + 1] += sum2;
1295     y[8 * i + 2] += sum3;
1296     y[8 * i + 3] += sum4;
1297     y[8 * i + 4] += sum5;
1298     y[8 * i + 5] += sum6;
1299     y[8 * i + 6] += sum7;
1300     y[8 * i + 7] += sum8;
1301   }
1302 
1303   PetscCall(PetscLogFlops(16.0 * a->nz));
1304   PetscCall(VecRestoreArrayRead(xx, &x));
1305   PetscCall(VecRestoreArray(zz, &y));
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) {
1310   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1311   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1312   const PetscScalar *x, *v;
1313   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
1314   const PetscInt     m = b->AIJ->rmap->n, *idx;
1315   PetscInt           n, i;
1316 
1317   PetscFunctionBegin;
1318   if (yy != zz) PetscCall(VecCopy(yy, zz));
1319   PetscCall(VecGetArrayRead(xx, &x));
1320   PetscCall(VecGetArray(zz, &y));
1321   for (i = 0; i < m; i++) {
1322     idx    = a->j + a->i[i];
1323     v      = a->a + a->i[i];
1324     n      = a->i[i + 1] - a->i[i];
1325     alpha1 = x[8 * i];
1326     alpha2 = x[8 * i + 1];
1327     alpha3 = x[8 * i + 2];
1328     alpha4 = x[8 * i + 3];
1329     alpha5 = x[8 * i + 4];
1330     alpha6 = x[8 * i + 5];
1331     alpha7 = x[8 * i + 6];
1332     alpha8 = x[8 * i + 7];
1333     while (n-- > 0) {
1334       y[8 * (*idx)] += alpha1 * (*v);
1335       y[8 * (*idx) + 1] += alpha2 * (*v);
1336       y[8 * (*idx) + 2] += alpha3 * (*v);
1337       y[8 * (*idx) + 3] += alpha4 * (*v);
1338       y[8 * (*idx) + 4] += alpha5 * (*v);
1339       y[8 * (*idx) + 5] += alpha6 * (*v);
1340       y[8 * (*idx) + 6] += alpha7 * (*v);
1341       y[8 * (*idx) + 7] += alpha8 * (*v);
1342       idx++;
1343       v++;
1344     }
1345   }
1346   PetscCall(PetscLogFlops(16.0 * a->nz));
1347   PetscCall(VecRestoreArrayRead(xx, &x));
1348   PetscCall(VecRestoreArray(zz, &y));
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 /* ------------------------------------------------------------------------------*/
1353 PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy) {
1354   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1355   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1356   const PetscScalar *x, *v;
1357   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1358   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1359   PetscInt           nonzerorow = 0, n, i, jrow, j;
1360 
1361   PetscFunctionBegin;
1362   PetscCall(VecGetArrayRead(xx, &x));
1363   PetscCall(VecGetArray(yy, &y));
1364   idx = a->j;
1365   v   = a->a;
1366   ii  = a->i;
1367 
1368   for (i = 0; i < m; i++) {
1369     jrow = ii[i];
1370     n    = ii[i + 1] - jrow;
1371     sum1 = 0.0;
1372     sum2 = 0.0;
1373     sum3 = 0.0;
1374     sum4 = 0.0;
1375     sum5 = 0.0;
1376     sum6 = 0.0;
1377     sum7 = 0.0;
1378     sum8 = 0.0;
1379     sum9 = 0.0;
1380 
1381     nonzerorow += (n > 0);
1382     for (j = 0; j < n; j++) {
1383       sum1 += v[jrow] * x[9 * idx[jrow]];
1384       sum2 += v[jrow] * x[9 * idx[jrow] + 1];
1385       sum3 += v[jrow] * x[9 * idx[jrow] + 2];
1386       sum4 += v[jrow] * x[9 * idx[jrow] + 3];
1387       sum5 += v[jrow] * x[9 * idx[jrow] + 4];
1388       sum6 += v[jrow] * x[9 * idx[jrow] + 5];
1389       sum7 += v[jrow] * x[9 * idx[jrow] + 6];
1390       sum8 += v[jrow] * x[9 * idx[jrow] + 7];
1391       sum9 += v[jrow] * x[9 * idx[jrow] + 8];
1392       jrow++;
1393     }
1394     y[9 * i]     = sum1;
1395     y[9 * i + 1] = sum2;
1396     y[9 * i + 2] = sum3;
1397     y[9 * i + 3] = sum4;
1398     y[9 * i + 4] = sum5;
1399     y[9 * i + 5] = sum6;
1400     y[9 * i + 6] = sum7;
1401     y[9 * i + 7] = sum8;
1402     y[9 * i + 8] = sum9;
1403   }
1404 
1405   PetscCall(PetscLogFlops(18.0 * a->nz - 9 * nonzerorow));
1406   PetscCall(VecRestoreArrayRead(xx, &x));
1407   PetscCall(VecRestoreArray(yy, &y));
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 /* ------------------------------------------------------------------------------*/
1412 
1413 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy) {
1414   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1415   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1416   const PetscScalar *x, *v;
1417   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9;
1418   const PetscInt     m = b->AIJ->rmap->n, *idx;
1419   PetscInt           n, i;
1420 
1421   PetscFunctionBegin;
1422   PetscCall(VecSet(yy, 0.0));
1423   PetscCall(VecGetArrayRead(xx, &x));
1424   PetscCall(VecGetArray(yy, &y));
1425 
1426   for (i = 0; i < m; i++) {
1427     idx    = a->j + a->i[i];
1428     v      = a->a + a->i[i];
1429     n      = a->i[i + 1] - a->i[i];
1430     alpha1 = x[9 * i];
1431     alpha2 = x[9 * i + 1];
1432     alpha3 = x[9 * i + 2];
1433     alpha4 = x[9 * i + 3];
1434     alpha5 = x[9 * i + 4];
1435     alpha6 = x[9 * i + 5];
1436     alpha7 = x[9 * i + 6];
1437     alpha8 = x[9 * i + 7];
1438     alpha9 = x[9 * i + 8];
1439     while (n-- > 0) {
1440       y[9 * (*idx)] += alpha1 * (*v);
1441       y[9 * (*idx) + 1] += alpha2 * (*v);
1442       y[9 * (*idx) + 2] += alpha3 * (*v);
1443       y[9 * (*idx) + 3] += alpha4 * (*v);
1444       y[9 * (*idx) + 4] += alpha5 * (*v);
1445       y[9 * (*idx) + 5] += alpha6 * (*v);
1446       y[9 * (*idx) + 6] += alpha7 * (*v);
1447       y[9 * (*idx) + 7] += alpha8 * (*v);
1448       y[9 * (*idx) + 8] += alpha9 * (*v);
1449       idx++;
1450       v++;
1451     }
1452   }
1453   PetscCall(PetscLogFlops(18.0 * a->nz));
1454   PetscCall(VecRestoreArrayRead(xx, &x));
1455   PetscCall(VecRestoreArray(yy, &y));
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) {
1460   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1461   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1462   const PetscScalar *x, *v;
1463   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1464   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1465   PetscInt           n, i, jrow, j;
1466 
1467   PetscFunctionBegin;
1468   if (yy != zz) PetscCall(VecCopy(yy, zz));
1469   PetscCall(VecGetArrayRead(xx, &x));
1470   PetscCall(VecGetArray(zz, &y));
1471   idx = a->j;
1472   v   = a->a;
1473   ii  = a->i;
1474 
1475   for (i = 0; i < m; i++) {
1476     jrow = ii[i];
1477     n    = ii[i + 1] - jrow;
1478     sum1 = 0.0;
1479     sum2 = 0.0;
1480     sum3 = 0.0;
1481     sum4 = 0.0;
1482     sum5 = 0.0;
1483     sum6 = 0.0;
1484     sum7 = 0.0;
1485     sum8 = 0.0;
1486     sum9 = 0.0;
1487     for (j = 0; j < n; j++) {
1488       sum1 += v[jrow] * x[9 * idx[jrow]];
1489       sum2 += v[jrow] * x[9 * idx[jrow] + 1];
1490       sum3 += v[jrow] * x[9 * idx[jrow] + 2];
1491       sum4 += v[jrow] * x[9 * idx[jrow] + 3];
1492       sum5 += v[jrow] * x[9 * idx[jrow] + 4];
1493       sum6 += v[jrow] * x[9 * idx[jrow] + 5];
1494       sum7 += v[jrow] * x[9 * idx[jrow] + 6];
1495       sum8 += v[jrow] * x[9 * idx[jrow] + 7];
1496       sum9 += v[jrow] * x[9 * idx[jrow] + 8];
1497       jrow++;
1498     }
1499     y[9 * i] += sum1;
1500     y[9 * i + 1] += sum2;
1501     y[9 * i + 2] += sum3;
1502     y[9 * i + 3] += sum4;
1503     y[9 * i + 4] += sum5;
1504     y[9 * i + 5] += sum6;
1505     y[9 * i + 6] += sum7;
1506     y[9 * i + 7] += sum8;
1507     y[9 * i + 8] += sum9;
1508   }
1509 
1510   PetscCall(PetscLogFlops(18.0 * a->nz));
1511   PetscCall(VecRestoreArrayRead(xx, &x));
1512   PetscCall(VecRestoreArray(zz, &y));
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) {
1517   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1518   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1519   const PetscScalar *x, *v;
1520   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9;
1521   const PetscInt     m = b->AIJ->rmap->n, *idx;
1522   PetscInt           n, i;
1523 
1524   PetscFunctionBegin;
1525   if (yy != zz) PetscCall(VecCopy(yy, zz));
1526   PetscCall(VecGetArrayRead(xx, &x));
1527   PetscCall(VecGetArray(zz, &y));
1528   for (i = 0; i < m; i++) {
1529     idx    = a->j + a->i[i];
1530     v      = a->a + a->i[i];
1531     n      = a->i[i + 1] - a->i[i];
1532     alpha1 = x[9 * i];
1533     alpha2 = x[9 * i + 1];
1534     alpha3 = x[9 * i + 2];
1535     alpha4 = x[9 * i + 3];
1536     alpha5 = x[9 * i + 4];
1537     alpha6 = x[9 * i + 5];
1538     alpha7 = x[9 * i + 6];
1539     alpha8 = x[9 * i + 7];
1540     alpha9 = x[9 * i + 8];
1541     while (n-- > 0) {
1542       y[9 * (*idx)] += alpha1 * (*v);
1543       y[9 * (*idx) + 1] += alpha2 * (*v);
1544       y[9 * (*idx) + 2] += alpha3 * (*v);
1545       y[9 * (*idx) + 3] += alpha4 * (*v);
1546       y[9 * (*idx) + 4] += alpha5 * (*v);
1547       y[9 * (*idx) + 5] += alpha6 * (*v);
1548       y[9 * (*idx) + 6] += alpha7 * (*v);
1549       y[9 * (*idx) + 7] += alpha8 * (*v);
1550       y[9 * (*idx) + 8] += alpha9 * (*v);
1551       idx++;
1552       v++;
1553     }
1554   }
1555   PetscCall(PetscLogFlops(18.0 * a->nz));
1556   PetscCall(VecRestoreArrayRead(xx, &x));
1557   PetscCall(VecRestoreArray(zz, &y));
1558   PetscFunctionReturn(0);
1559 }
1560 PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy) {
1561   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1562   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1563   const PetscScalar *x, *v;
1564   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1565   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1566   PetscInt           nonzerorow = 0, n, i, jrow, j;
1567 
1568   PetscFunctionBegin;
1569   PetscCall(VecGetArrayRead(xx, &x));
1570   PetscCall(VecGetArray(yy, &y));
1571   idx = a->j;
1572   v   = a->a;
1573   ii  = a->i;
1574 
1575   for (i = 0; i < m; i++) {
1576     jrow  = ii[i];
1577     n     = ii[i + 1] - jrow;
1578     sum1  = 0.0;
1579     sum2  = 0.0;
1580     sum3  = 0.0;
1581     sum4  = 0.0;
1582     sum5  = 0.0;
1583     sum6  = 0.0;
1584     sum7  = 0.0;
1585     sum8  = 0.0;
1586     sum9  = 0.0;
1587     sum10 = 0.0;
1588 
1589     nonzerorow += (n > 0);
1590     for (j = 0; j < n; j++) {
1591       sum1 += v[jrow] * x[10 * idx[jrow]];
1592       sum2 += v[jrow] * x[10 * idx[jrow] + 1];
1593       sum3 += v[jrow] * x[10 * idx[jrow] + 2];
1594       sum4 += v[jrow] * x[10 * idx[jrow] + 3];
1595       sum5 += v[jrow] * x[10 * idx[jrow] + 4];
1596       sum6 += v[jrow] * x[10 * idx[jrow] + 5];
1597       sum7 += v[jrow] * x[10 * idx[jrow] + 6];
1598       sum8 += v[jrow] * x[10 * idx[jrow] + 7];
1599       sum9 += v[jrow] * x[10 * idx[jrow] + 8];
1600       sum10 += v[jrow] * x[10 * idx[jrow] + 9];
1601       jrow++;
1602     }
1603     y[10 * i]     = sum1;
1604     y[10 * i + 1] = sum2;
1605     y[10 * i + 2] = sum3;
1606     y[10 * i + 3] = sum4;
1607     y[10 * i + 4] = sum5;
1608     y[10 * i + 5] = sum6;
1609     y[10 * i + 6] = sum7;
1610     y[10 * i + 7] = sum8;
1611     y[10 * i + 8] = sum9;
1612     y[10 * i + 9] = sum10;
1613   }
1614 
1615   PetscCall(PetscLogFlops(20.0 * a->nz - 10.0 * nonzerorow));
1616   PetscCall(VecRestoreArrayRead(xx, &x));
1617   PetscCall(VecRestoreArray(yy, &y));
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) {
1622   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1623   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1624   const PetscScalar *x, *v;
1625   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1626   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1627   PetscInt           n, i, jrow, j;
1628 
1629   PetscFunctionBegin;
1630   if (yy != zz) PetscCall(VecCopy(yy, zz));
1631   PetscCall(VecGetArrayRead(xx, &x));
1632   PetscCall(VecGetArray(zz, &y));
1633   idx = a->j;
1634   v   = a->a;
1635   ii  = a->i;
1636 
1637   for (i = 0; i < m; i++) {
1638     jrow  = ii[i];
1639     n     = ii[i + 1] - jrow;
1640     sum1  = 0.0;
1641     sum2  = 0.0;
1642     sum3  = 0.0;
1643     sum4  = 0.0;
1644     sum5  = 0.0;
1645     sum6  = 0.0;
1646     sum7  = 0.0;
1647     sum8  = 0.0;
1648     sum9  = 0.0;
1649     sum10 = 0.0;
1650     for (j = 0; j < n; j++) {
1651       sum1 += v[jrow] * x[10 * idx[jrow]];
1652       sum2 += v[jrow] * x[10 * idx[jrow] + 1];
1653       sum3 += v[jrow] * x[10 * idx[jrow] + 2];
1654       sum4 += v[jrow] * x[10 * idx[jrow] + 3];
1655       sum5 += v[jrow] * x[10 * idx[jrow] + 4];
1656       sum6 += v[jrow] * x[10 * idx[jrow] + 5];
1657       sum7 += v[jrow] * x[10 * idx[jrow] + 6];
1658       sum8 += v[jrow] * x[10 * idx[jrow] + 7];
1659       sum9 += v[jrow] * x[10 * idx[jrow] + 8];
1660       sum10 += v[jrow] * x[10 * idx[jrow] + 9];
1661       jrow++;
1662     }
1663     y[10 * i] += sum1;
1664     y[10 * i + 1] += sum2;
1665     y[10 * i + 2] += sum3;
1666     y[10 * i + 3] += sum4;
1667     y[10 * i + 4] += sum5;
1668     y[10 * i + 5] += sum6;
1669     y[10 * i + 6] += sum7;
1670     y[10 * i + 7] += sum8;
1671     y[10 * i + 8] += sum9;
1672     y[10 * i + 9] += sum10;
1673   }
1674 
1675   PetscCall(PetscLogFlops(20.0 * a->nz));
1676   PetscCall(VecRestoreArrayRead(xx, &x));
1677   PetscCall(VecRestoreArray(yy, &y));
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy) {
1682   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1683   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1684   const PetscScalar *x, *v;
1685   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10;
1686   const PetscInt     m = b->AIJ->rmap->n, *idx;
1687   PetscInt           n, i;
1688 
1689   PetscFunctionBegin;
1690   PetscCall(VecSet(yy, 0.0));
1691   PetscCall(VecGetArrayRead(xx, &x));
1692   PetscCall(VecGetArray(yy, &y));
1693 
1694   for (i = 0; i < m; i++) {
1695     idx     = a->j + a->i[i];
1696     v       = a->a + a->i[i];
1697     n       = a->i[i + 1] - a->i[i];
1698     alpha1  = x[10 * i];
1699     alpha2  = x[10 * i + 1];
1700     alpha3  = x[10 * i + 2];
1701     alpha4  = x[10 * i + 3];
1702     alpha5  = x[10 * i + 4];
1703     alpha6  = x[10 * i + 5];
1704     alpha7  = x[10 * i + 6];
1705     alpha8  = x[10 * i + 7];
1706     alpha9  = x[10 * i + 8];
1707     alpha10 = x[10 * i + 9];
1708     while (n-- > 0) {
1709       y[10 * (*idx)] += alpha1 * (*v);
1710       y[10 * (*idx) + 1] += alpha2 * (*v);
1711       y[10 * (*idx) + 2] += alpha3 * (*v);
1712       y[10 * (*idx) + 3] += alpha4 * (*v);
1713       y[10 * (*idx) + 4] += alpha5 * (*v);
1714       y[10 * (*idx) + 5] += alpha6 * (*v);
1715       y[10 * (*idx) + 6] += alpha7 * (*v);
1716       y[10 * (*idx) + 7] += alpha8 * (*v);
1717       y[10 * (*idx) + 8] += alpha9 * (*v);
1718       y[10 * (*idx) + 9] += alpha10 * (*v);
1719       idx++;
1720       v++;
1721     }
1722   }
1723   PetscCall(PetscLogFlops(20.0 * a->nz));
1724   PetscCall(VecRestoreArrayRead(xx, &x));
1725   PetscCall(VecRestoreArray(yy, &y));
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) {
1730   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1731   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1732   const PetscScalar *x, *v;
1733   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10;
1734   const PetscInt     m = b->AIJ->rmap->n, *idx;
1735   PetscInt           n, i;
1736 
1737   PetscFunctionBegin;
1738   if (yy != zz) PetscCall(VecCopy(yy, zz));
1739   PetscCall(VecGetArrayRead(xx, &x));
1740   PetscCall(VecGetArray(zz, &y));
1741   for (i = 0; i < m; i++) {
1742     idx     = a->j + a->i[i];
1743     v       = a->a + a->i[i];
1744     n       = a->i[i + 1] - a->i[i];
1745     alpha1  = x[10 * i];
1746     alpha2  = x[10 * i + 1];
1747     alpha3  = x[10 * i + 2];
1748     alpha4  = x[10 * i + 3];
1749     alpha5  = x[10 * i + 4];
1750     alpha6  = x[10 * i + 5];
1751     alpha7  = x[10 * i + 6];
1752     alpha8  = x[10 * i + 7];
1753     alpha9  = x[10 * i + 8];
1754     alpha10 = x[10 * i + 9];
1755     while (n-- > 0) {
1756       y[10 * (*idx)] += alpha1 * (*v);
1757       y[10 * (*idx) + 1] += alpha2 * (*v);
1758       y[10 * (*idx) + 2] += alpha3 * (*v);
1759       y[10 * (*idx) + 3] += alpha4 * (*v);
1760       y[10 * (*idx) + 4] += alpha5 * (*v);
1761       y[10 * (*idx) + 5] += alpha6 * (*v);
1762       y[10 * (*idx) + 6] += alpha7 * (*v);
1763       y[10 * (*idx) + 7] += alpha8 * (*v);
1764       y[10 * (*idx) + 8] += alpha9 * (*v);
1765       y[10 * (*idx) + 9] += alpha10 * (*v);
1766       idx++;
1767       v++;
1768     }
1769   }
1770   PetscCall(PetscLogFlops(20.0 * a->nz));
1771   PetscCall(VecRestoreArrayRead(xx, &x));
1772   PetscCall(VecRestoreArray(zz, &y));
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 /*--------------------------------------------------------------------------------------------*/
1777 PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy) {
1778   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1779   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1780   const PetscScalar *x, *v;
1781   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1782   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1783   PetscInt           nonzerorow = 0, n, i, jrow, j;
1784 
1785   PetscFunctionBegin;
1786   PetscCall(VecGetArrayRead(xx, &x));
1787   PetscCall(VecGetArray(yy, &y));
1788   idx = a->j;
1789   v   = a->a;
1790   ii  = a->i;
1791 
1792   for (i = 0; i < m; i++) {
1793     jrow  = ii[i];
1794     n     = ii[i + 1] - jrow;
1795     sum1  = 0.0;
1796     sum2  = 0.0;
1797     sum3  = 0.0;
1798     sum4  = 0.0;
1799     sum5  = 0.0;
1800     sum6  = 0.0;
1801     sum7  = 0.0;
1802     sum8  = 0.0;
1803     sum9  = 0.0;
1804     sum10 = 0.0;
1805     sum11 = 0.0;
1806 
1807     nonzerorow += (n > 0);
1808     for (j = 0; j < n; j++) {
1809       sum1 += v[jrow] * x[11 * idx[jrow]];
1810       sum2 += v[jrow] * x[11 * idx[jrow] + 1];
1811       sum3 += v[jrow] * x[11 * idx[jrow] + 2];
1812       sum4 += v[jrow] * x[11 * idx[jrow] + 3];
1813       sum5 += v[jrow] * x[11 * idx[jrow] + 4];
1814       sum6 += v[jrow] * x[11 * idx[jrow] + 5];
1815       sum7 += v[jrow] * x[11 * idx[jrow] + 6];
1816       sum8 += v[jrow] * x[11 * idx[jrow] + 7];
1817       sum9 += v[jrow] * x[11 * idx[jrow] + 8];
1818       sum10 += v[jrow] * x[11 * idx[jrow] + 9];
1819       sum11 += v[jrow] * x[11 * idx[jrow] + 10];
1820       jrow++;
1821     }
1822     y[11 * i]      = sum1;
1823     y[11 * i + 1]  = sum2;
1824     y[11 * i + 2]  = sum3;
1825     y[11 * i + 3]  = sum4;
1826     y[11 * i + 4]  = sum5;
1827     y[11 * i + 5]  = sum6;
1828     y[11 * i + 6]  = sum7;
1829     y[11 * i + 7]  = sum8;
1830     y[11 * i + 8]  = sum9;
1831     y[11 * i + 9]  = sum10;
1832     y[11 * i + 10] = sum11;
1833   }
1834 
1835   PetscCall(PetscLogFlops(22.0 * a->nz - 11 * nonzerorow));
1836   PetscCall(VecRestoreArrayRead(xx, &x));
1837   PetscCall(VecRestoreArray(yy, &y));
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) {
1842   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1843   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1844   const PetscScalar *x, *v;
1845   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1846   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1847   PetscInt           n, i, jrow, j;
1848 
1849   PetscFunctionBegin;
1850   if (yy != zz) PetscCall(VecCopy(yy, zz));
1851   PetscCall(VecGetArrayRead(xx, &x));
1852   PetscCall(VecGetArray(zz, &y));
1853   idx = a->j;
1854   v   = a->a;
1855   ii  = a->i;
1856 
1857   for (i = 0; i < m; i++) {
1858     jrow  = ii[i];
1859     n     = ii[i + 1] - jrow;
1860     sum1  = 0.0;
1861     sum2  = 0.0;
1862     sum3  = 0.0;
1863     sum4  = 0.0;
1864     sum5  = 0.0;
1865     sum6  = 0.0;
1866     sum7  = 0.0;
1867     sum8  = 0.0;
1868     sum9  = 0.0;
1869     sum10 = 0.0;
1870     sum11 = 0.0;
1871     for (j = 0; j < n; j++) {
1872       sum1 += v[jrow] * x[11 * idx[jrow]];
1873       sum2 += v[jrow] * x[11 * idx[jrow] + 1];
1874       sum3 += v[jrow] * x[11 * idx[jrow] + 2];
1875       sum4 += v[jrow] * x[11 * idx[jrow] + 3];
1876       sum5 += v[jrow] * x[11 * idx[jrow] + 4];
1877       sum6 += v[jrow] * x[11 * idx[jrow] + 5];
1878       sum7 += v[jrow] * x[11 * idx[jrow] + 6];
1879       sum8 += v[jrow] * x[11 * idx[jrow] + 7];
1880       sum9 += v[jrow] * x[11 * idx[jrow] + 8];
1881       sum10 += v[jrow] * x[11 * idx[jrow] + 9];
1882       sum11 += v[jrow] * x[11 * idx[jrow] + 10];
1883       jrow++;
1884     }
1885     y[11 * i] += sum1;
1886     y[11 * i + 1] += sum2;
1887     y[11 * i + 2] += sum3;
1888     y[11 * i + 3] += sum4;
1889     y[11 * i + 4] += sum5;
1890     y[11 * i + 5] += sum6;
1891     y[11 * i + 6] += sum7;
1892     y[11 * i + 7] += sum8;
1893     y[11 * i + 8] += sum9;
1894     y[11 * i + 9] += sum10;
1895     y[11 * i + 10] += sum11;
1896   }
1897 
1898   PetscCall(PetscLogFlops(22.0 * a->nz));
1899   PetscCall(VecRestoreArrayRead(xx, &x));
1900   PetscCall(VecRestoreArray(yy, &y));
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy) {
1905   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1906   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1907   const PetscScalar *x, *v;
1908   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11;
1909   const PetscInt     m = b->AIJ->rmap->n, *idx;
1910   PetscInt           n, i;
1911 
1912   PetscFunctionBegin;
1913   PetscCall(VecSet(yy, 0.0));
1914   PetscCall(VecGetArrayRead(xx, &x));
1915   PetscCall(VecGetArray(yy, &y));
1916 
1917   for (i = 0; i < m; i++) {
1918     idx     = a->j + a->i[i];
1919     v       = a->a + a->i[i];
1920     n       = a->i[i + 1] - a->i[i];
1921     alpha1  = x[11 * i];
1922     alpha2  = x[11 * i + 1];
1923     alpha3  = x[11 * i + 2];
1924     alpha4  = x[11 * i + 3];
1925     alpha5  = x[11 * i + 4];
1926     alpha6  = x[11 * i + 5];
1927     alpha7  = x[11 * i + 6];
1928     alpha8  = x[11 * i + 7];
1929     alpha9  = x[11 * i + 8];
1930     alpha10 = x[11 * i + 9];
1931     alpha11 = x[11 * i + 10];
1932     while (n-- > 0) {
1933       y[11 * (*idx)] += alpha1 * (*v);
1934       y[11 * (*idx) + 1] += alpha2 * (*v);
1935       y[11 * (*idx) + 2] += alpha3 * (*v);
1936       y[11 * (*idx) + 3] += alpha4 * (*v);
1937       y[11 * (*idx) + 4] += alpha5 * (*v);
1938       y[11 * (*idx) + 5] += alpha6 * (*v);
1939       y[11 * (*idx) + 6] += alpha7 * (*v);
1940       y[11 * (*idx) + 7] += alpha8 * (*v);
1941       y[11 * (*idx) + 8] += alpha9 * (*v);
1942       y[11 * (*idx) + 9] += alpha10 * (*v);
1943       y[11 * (*idx) + 10] += alpha11 * (*v);
1944       idx++;
1945       v++;
1946     }
1947   }
1948   PetscCall(PetscLogFlops(22.0 * a->nz));
1949   PetscCall(VecRestoreArrayRead(xx, &x));
1950   PetscCall(VecRestoreArray(yy, &y));
1951   PetscFunctionReturn(0);
1952 }
1953 
1954 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) {
1955   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1956   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1957   const PetscScalar *x, *v;
1958   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11;
1959   const PetscInt     m = b->AIJ->rmap->n, *idx;
1960   PetscInt           n, i;
1961 
1962   PetscFunctionBegin;
1963   if (yy != zz) PetscCall(VecCopy(yy, zz));
1964   PetscCall(VecGetArrayRead(xx, &x));
1965   PetscCall(VecGetArray(zz, &y));
1966   for (i = 0; i < m; i++) {
1967     idx     = a->j + a->i[i];
1968     v       = a->a + a->i[i];
1969     n       = a->i[i + 1] - a->i[i];
1970     alpha1  = x[11 * i];
1971     alpha2  = x[11 * i + 1];
1972     alpha3  = x[11 * i + 2];
1973     alpha4  = x[11 * i + 3];
1974     alpha5  = x[11 * i + 4];
1975     alpha6  = x[11 * i + 5];
1976     alpha7  = x[11 * i + 6];
1977     alpha8  = x[11 * i + 7];
1978     alpha9  = x[11 * i + 8];
1979     alpha10 = x[11 * i + 9];
1980     alpha11 = x[11 * i + 10];
1981     while (n-- > 0) {
1982       y[11 * (*idx)] += alpha1 * (*v);
1983       y[11 * (*idx) + 1] += alpha2 * (*v);
1984       y[11 * (*idx) + 2] += alpha3 * (*v);
1985       y[11 * (*idx) + 3] += alpha4 * (*v);
1986       y[11 * (*idx) + 4] += alpha5 * (*v);
1987       y[11 * (*idx) + 5] += alpha6 * (*v);
1988       y[11 * (*idx) + 6] += alpha7 * (*v);
1989       y[11 * (*idx) + 7] += alpha8 * (*v);
1990       y[11 * (*idx) + 8] += alpha9 * (*v);
1991       y[11 * (*idx) + 9] += alpha10 * (*v);
1992       y[11 * (*idx) + 10] += alpha11 * (*v);
1993       idx++;
1994       v++;
1995     }
1996   }
1997   PetscCall(PetscLogFlops(22.0 * a->nz));
1998   PetscCall(VecRestoreArrayRead(xx, &x));
1999   PetscCall(VecRestoreArray(zz, &y));
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 /*--------------------------------------------------------------------------------------------*/
2004 PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy) {
2005   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2006   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2007   const PetscScalar *x, *v;
2008   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2009   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2010   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
2011   PetscInt           nonzerorow = 0, n, i, jrow, j;
2012 
2013   PetscFunctionBegin;
2014   PetscCall(VecGetArrayRead(xx, &x));
2015   PetscCall(VecGetArray(yy, &y));
2016   idx = a->j;
2017   v   = a->a;
2018   ii  = a->i;
2019 
2020   for (i = 0; i < m; i++) {
2021     jrow  = ii[i];
2022     n     = ii[i + 1] - jrow;
2023     sum1  = 0.0;
2024     sum2  = 0.0;
2025     sum3  = 0.0;
2026     sum4  = 0.0;
2027     sum5  = 0.0;
2028     sum6  = 0.0;
2029     sum7  = 0.0;
2030     sum8  = 0.0;
2031     sum9  = 0.0;
2032     sum10 = 0.0;
2033     sum11 = 0.0;
2034     sum12 = 0.0;
2035     sum13 = 0.0;
2036     sum14 = 0.0;
2037     sum15 = 0.0;
2038     sum16 = 0.0;
2039 
2040     nonzerorow += (n > 0);
2041     for (j = 0; j < n; j++) {
2042       sum1 += v[jrow] * x[16 * idx[jrow]];
2043       sum2 += v[jrow] * x[16 * idx[jrow] + 1];
2044       sum3 += v[jrow] * x[16 * idx[jrow] + 2];
2045       sum4 += v[jrow] * x[16 * idx[jrow] + 3];
2046       sum5 += v[jrow] * x[16 * idx[jrow] + 4];
2047       sum6 += v[jrow] * x[16 * idx[jrow] + 5];
2048       sum7 += v[jrow] * x[16 * idx[jrow] + 6];
2049       sum8 += v[jrow] * x[16 * idx[jrow] + 7];
2050       sum9 += v[jrow] * x[16 * idx[jrow] + 8];
2051       sum10 += v[jrow] * x[16 * idx[jrow] + 9];
2052       sum11 += v[jrow] * x[16 * idx[jrow] + 10];
2053       sum12 += v[jrow] * x[16 * idx[jrow] + 11];
2054       sum13 += v[jrow] * x[16 * idx[jrow] + 12];
2055       sum14 += v[jrow] * x[16 * idx[jrow] + 13];
2056       sum15 += v[jrow] * x[16 * idx[jrow] + 14];
2057       sum16 += v[jrow] * x[16 * idx[jrow] + 15];
2058       jrow++;
2059     }
2060     y[16 * i]      = sum1;
2061     y[16 * i + 1]  = sum2;
2062     y[16 * i + 2]  = sum3;
2063     y[16 * i + 3]  = sum4;
2064     y[16 * i + 4]  = sum5;
2065     y[16 * i + 5]  = sum6;
2066     y[16 * i + 6]  = sum7;
2067     y[16 * i + 7]  = sum8;
2068     y[16 * i + 8]  = sum9;
2069     y[16 * i + 9]  = sum10;
2070     y[16 * i + 10] = sum11;
2071     y[16 * i + 11] = sum12;
2072     y[16 * i + 12] = sum13;
2073     y[16 * i + 13] = sum14;
2074     y[16 * i + 14] = sum15;
2075     y[16 * i + 15] = sum16;
2076   }
2077 
2078   PetscCall(PetscLogFlops(32.0 * a->nz - 16.0 * nonzerorow));
2079   PetscCall(VecRestoreArrayRead(xx, &x));
2080   PetscCall(VecRestoreArray(yy, &y));
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy) {
2085   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2086   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2087   const PetscScalar *x, *v;
2088   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2089   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16;
2090   const PetscInt     m = b->AIJ->rmap->n, *idx;
2091   PetscInt           n, i;
2092 
2093   PetscFunctionBegin;
2094   PetscCall(VecSet(yy, 0.0));
2095   PetscCall(VecGetArrayRead(xx, &x));
2096   PetscCall(VecGetArray(yy, &y));
2097 
2098   for (i = 0; i < m; i++) {
2099     idx     = a->j + a->i[i];
2100     v       = a->a + a->i[i];
2101     n       = a->i[i + 1] - a->i[i];
2102     alpha1  = x[16 * i];
2103     alpha2  = x[16 * i + 1];
2104     alpha3  = x[16 * i + 2];
2105     alpha4  = x[16 * i + 3];
2106     alpha5  = x[16 * i + 4];
2107     alpha6  = x[16 * i + 5];
2108     alpha7  = x[16 * i + 6];
2109     alpha8  = x[16 * i + 7];
2110     alpha9  = x[16 * i + 8];
2111     alpha10 = x[16 * i + 9];
2112     alpha11 = x[16 * i + 10];
2113     alpha12 = x[16 * i + 11];
2114     alpha13 = x[16 * i + 12];
2115     alpha14 = x[16 * i + 13];
2116     alpha15 = x[16 * i + 14];
2117     alpha16 = x[16 * i + 15];
2118     while (n-- > 0) {
2119       y[16 * (*idx)] += alpha1 * (*v);
2120       y[16 * (*idx) + 1] += alpha2 * (*v);
2121       y[16 * (*idx) + 2] += alpha3 * (*v);
2122       y[16 * (*idx) + 3] += alpha4 * (*v);
2123       y[16 * (*idx) + 4] += alpha5 * (*v);
2124       y[16 * (*idx) + 5] += alpha6 * (*v);
2125       y[16 * (*idx) + 6] += alpha7 * (*v);
2126       y[16 * (*idx) + 7] += alpha8 * (*v);
2127       y[16 * (*idx) + 8] += alpha9 * (*v);
2128       y[16 * (*idx) + 9] += alpha10 * (*v);
2129       y[16 * (*idx) + 10] += alpha11 * (*v);
2130       y[16 * (*idx) + 11] += alpha12 * (*v);
2131       y[16 * (*idx) + 12] += alpha13 * (*v);
2132       y[16 * (*idx) + 13] += alpha14 * (*v);
2133       y[16 * (*idx) + 14] += alpha15 * (*v);
2134       y[16 * (*idx) + 15] += alpha16 * (*v);
2135       idx++;
2136       v++;
2137     }
2138   }
2139   PetscCall(PetscLogFlops(32.0 * a->nz));
2140   PetscCall(VecRestoreArrayRead(xx, &x));
2141   PetscCall(VecRestoreArray(yy, &y));
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) {
2146   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2147   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2148   const PetscScalar *x, *v;
2149   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2150   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2151   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2152   PetscInt           n, i, jrow, j;
2153 
2154   PetscFunctionBegin;
2155   if (yy != zz) PetscCall(VecCopy(yy, zz));
2156   PetscCall(VecGetArrayRead(xx, &x));
2157   PetscCall(VecGetArray(zz, &y));
2158   idx = a->j;
2159   v   = a->a;
2160   ii  = a->i;
2161 
2162   for (i = 0; i < m; i++) {
2163     jrow  = ii[i];
2164     n     = ii[i + 1] - jrow;
2165     sum1  = 0.0;
2166     sum2  = 0.0;
2167     sum3  = 0.0;
2168     sum4  = 0.0;
2169     sum5  = 0.0;
2170     sum6  = 0.0;
2171     sum7  = 0.0;
2172     sum8  = 0.0;
2173     sum9  = 0.0;
2174     sum10 = 0.0;
2175     sum11 = 0.0;
2176     sum12 = 0.0;
2177     sum13 = 0.0;
2178     sum14 = 0.0;
2179     sum15 = 0.0;
2180     sum16 = 0.0;
2181     for (j = 0; j < n; j++) {
2182       sum1 += v[jrow] * x[16 * idx[jrow]];
2183       sum2 += v[jrow] * x[16 * idx[jrow] + 1];
2184       sum3 += v[jrow] * x[16 * idx[jrow] + 2];
2185       sum4 += v[jrow] * x[16 * idx[jrow] + 3];
2186       sum5 += v[jrow] * x[16 * idx[jrow] + 4];
2187       sum6 += v[jrow] * x[16 * idx[jrow] + 5];
2188       sum7 += v[jrow] * x[16 * idx[jrow] + 6];
2189       sum8 += v[jrow] * x[16 * idx[jrow] + 7];
2190       sum9 += v[jrow] * x[16 * idx[jrow] + 8];
2191       sum10 += v[jrow] * x[16 * idx[jrow] + 9];
2192       sum11 += v[jrow] * x[16 * idx[jrow] + 10];
2193       sum12 += v[jrow] * x[16 * idx[jrow] + 11];
2194       sum13 += v[jrow] * x[16 * idx[jrow] + 12];
2195       sum14 += v[jrow] * x[16 * idx[jrow] + 13];
2196       sum15 += v[jrow] * x[16 * idx[jrow] + 14];
2197       sum16 += v[jrow] * x[16 * idx[jrow] + 15];
2198       jrow++;
2199     }
2200     y[16 * i] += sum1;
2201     y[16 * i + 1] += sum2;
2202     y[16 * i + 2] += sum3;
2203     y[16 * i + 3] += sum4;
2204     y[16 * i + 4] += sum5;
2205     y[16 * i + 5] += sum6;
2206     y[16 * i + 6] += sum7;
2207     y[16 * i + 7] += sum8;
2208     y[16 * i + 8] += sum9;
2209     y[16 * i + 9] += sum10;
2210     y[16 * i + 10] += sum11;
2211     y[16 * i + 11] += sum12;
2212     y[16 * i + 12] += sum13;
2213     y[16 * i + 13] += sum14;
2214     y[16 * i + 14] += sum15;
2215     y[16 * i + 15] += sum16;
2216   }
2217 
2218   PetscCall(PetscLogFlops(32.0 * a->nz));
2219   PetscCall(VecRestoreArrayRead(xx, &x));
2220   PetscCall(VecRestoreArray(zz, &y));
2221   PetscFunctionReturn(0);
2222 }
2223 
2224 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) {
2225   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2226   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2227   const PetscScalar *x, *v;
2228   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2229   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16;
2230   const PetscInt     m = b->AIJ->rmap->n, *idx;
2231   PetscInt           n, i;
2232 
2233   PetscFunctionBegin;
2234   if (yy != zz) PetscCall(VecCopy(yy, zz));
2235   PetscCall(VecGetArrayRead(xx, &x));
2236   PetscCall(VecGetArray(zz, &y));
2237   for (i = 0; i < m; i++) {
2238     idx     = a->j + a->i[i];
2239     v       = a->a + a->i[i];
2240     n       = a->i[i + 1] - a->i[i];
2241     alpha1  = x[16 * i];
2242     alpha2  = x[16 * i + 1];
2243     alpha3  = x[16 * i + 2];
2244     alpha4  = x[16 * i + 3];
2245     alpha5  = x[16 * i + 4];
2246     alpha6  = x[16 * i + 5];
2247     alpha7  = x[16 * i + 6];
2248     alpha8  = x[16 * i + 7];
2249     alpha9  = x[16 * i + 8];
2250     alpha10 = x[16 * i + 9];
2251     alpha11 = x[16 * i + 10];
2252     alpha12 = x[16 * i + 11];
2253     alpha13 = x[16 * i + 12];
2254     alpha14 = x[16 * i + 13];
2255     alpha15 = x[16 * i + 14];
2256     alpha16 = x[16 * i + 15];
2257     while (n-- > 0) {
2258       y[16 * (*idx)] += alpha1 * (*v);
2259       y[16 * (*idx) + 1] += alpha2 * (*v);
2260       y[16 * (*idx) + 2] += alpha3 * (*v);
2261       y[16 * (*idx) + 3] += alpha4 * (*v);
2262       y[16 * (*idx) + 4] += alpha5 * (*v);
2263       y[16 * (*idx) + 5] += alpha6 * (*v);
2264       y[16 * (*idx) + 6] += alpha7 * (*v);
2265       y[16 * (*idx) + 7] += alpha8 * (*v);
2266       y[16 * (*idx) + 8] += alpha9 * (*v);
2267       y[16 * (*idx) + 9] += alpha10 * (*v);
2268       y[16 * (*idx) + 10] += alpha11 * (*v);
2269       y[16 * (*idx) + 11] += alpha12 * (*v);
2270       y[16 * (*idx) + 12] += alpha13 * (*v);
2271       y[16 * (*idx) + 13] += alpha14 * (*v);
2272       y[16 * (*idx) + 14] += alpha15 * (*v);
2273       y[16 * (*idx) + 15] += alpha16 * (*v);
2274       idx++;
2275       v++;
2276     }
2277   }
2278   PetscCall(PetscLogFlops(32.0 * a->nz));
2279   PetscCall(VecRestoreArrayRead(xx, &x));
2280   PetscCall(VecRestoreArray(zz, &y));
2281   PetscFunctionReturn(0);
2282 }
2283 
2284 /*--------------------------------------------------------------------------------------------*/
2285 PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy) {
2286   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2287   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2288   const PetscScalar *x, *v;
2289   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2290   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2291   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
2292   PetscInt           nonzerorow = 0, n, i, jrow, j;
2293 
2294   PetscFunctionBegin;
2295   PetscCall(VecGetArrayRead(xx, &x));
2296   PetscCall(VecGetArray(yy, &y));
2297   idx = a->j;
2298   v   = a->a;
2299   ii  = a->i;
2300 
2301   for (i = 0; i < m; i++) {
2302     jrow  = ii[i];
2303     n     = ii[i + 1] - jrow;
2304     sum1  = 0.0;
2305     sum2  = 0.0;
2306     sum3  = 0.0;
2307     sum4  = 0.0;
2308     sum5  = 0.0;
2309     sum6  = 0.0;
2310     sum7  = 0.0;
2311     sum8  = 0.0;
2312     sum9  = 0.0;
2313     sum10 = 0.0;
2314     sum11 = 0.0;
2315     sum12 = 0.0;
2316     sum13 = 0.0;
2317     sum14 = 0.0;
2318     sum15 = 0.0;
2319     sum16 = 0.0;
2320     sum17 = 0.0;
2321     sum18 = 0.0;
2322 
2323     nonzerorow += (n > 0);
2324     for (j = 0; j < n; j++) {
2325       sum1 += v[jrow] * x[18 * idx[jrow]];
2326       sum2 += v[jrow] * x[18 * idx[jrow] + 1];
2327       sum3 += v[jrow] * x[18 * idx[jrow] + 2];
2328       sum4 += v[jrow] * x[18 * idx[jrow] + 3];
2329       sum5 += v[jrow] * x[18 * idx[jrow] + 4];
2330       sum6 += v[jrow] * x[18 * idx[jrow] + 5];
2331       sum7 += v[jrow] * x[18 * idx[jrow] + 6];
2332       sum8 += v[jrow] * x[18 * idx[jrow] + 7];
2333       sum9 += v[jrow] * x[18 * idx[jrow] + 8];
2334       sum10 += v[jrow] * x[18 * idx[jrow] + 9];
2335       sum11 += v[jrow] * x[18 * idx[jrow] + 10];
2336       sum12 += v[jrow] * x[18 * idx[jrow] + 11];
2337       sum13 += v[jrow] * x[18 * idx[jrow] + 12];
2338       sum14 += v[jrow] * x[18 * idx[jrow] + 13];
2339       sum15 += v[jrow] * x[18 * idx[jrow] + 14];
2340       sum16 += v[jrow] * x[18 * idx[jrow] + 15];
2341       sum17 += v[jrow] * x[18 * idx[jrow] + 16];
2342       sum18 += v[jrow] * x[18 * idx[jrow] + 17];
2343       jrow++;
2344     }
2345     y[18 * i]      = sum1;
2346     y[18 * i + 1]  = sum2;
2347     y[18 * i + 2]  = sum3;
2348     y[18 * i + 3]  = sum4;
2349     y[18 * i + 4]  = sum5;
2350     y[18 * i + 5]  = sum6;
2351     y[18 * i + 6]  = sum7;
2352     y[18 * i + 7]  = sum8;
2353     y[18 * i + 8]  = sum9;
2354     y[18 * i + 9]  = sum10;
2355     y[18 * i + 10] = sum11;
2356     y[18 * i + 11] = sum12;
2357     y[18 * i + 12] = sum13;
2358     y[18 * i + 13] = sum14;
2359     y[18 * i + 14] = sum15;
2360     y[18 * i + 15] = sum16;
2361     y[18 * i + 16] = sum17;
2362     y[18 * i + 17] = sum18;
2363   }
2364 
2365   PetscCall(PetscLogFlops(36.0 * a->nz - 18.0 * nonzerorow));
2366   PetscCall(VecRestoreArrayRead(xx, &x));
2367   PetscCall(VecRestoreArray(yy, &y));
2368   PetscFunctionReturn(0);
2369 }
2370 
2371 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy) {
2372   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2373   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2374   const PetscScalar *x, *v;
2375   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2376   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18;
2377   const PetscInt     m = b->AIJ->rmap->n, *idx;
2378   PetscInt           n, i;
2379 
2380   PetscFunctionBegin;
2381   PetscCall(VecSet(yy, 0.0));
2382   PetscCall(VecGetArrayRead(xx, &x));
2383   PetscCall(VecGetArray(yy, &y));
2384 
2385   for (i = 0; i < m; i++) {
2386     idx     = a->j + a->i[i];
2387     v       = a->a + a->i[i];
2388     n       = a->i[i + 1] - a->i[i];
2389     alpha1  = x[18 * i];
2390     alpha2  = x[18 * i + 1];
2391     alpha3  = x[18 * i + 2];
2392     alpha4  = x[18 * i + 3];
2393     alpha5  = x[18 * i + 4];
2394     alpha6  = x[18 * i + 5];
2395     alpha7  = x[18 * i + 6];
2396     alpha8  = x[18 * i + 7];
2397     alpha9  = x[18 * i + 8];
2398     alpha10 = x[18 * i + 9];
2399     alpha11 = x[18 * i + 10];
2400     alpha12 = x[18 * i + 11];
2401     alpha13 = x[18 * i + 12];
2402     alpha14 = x[18 * i + 13];
2403     alpha15 = x[18 * i + 14];
2404     alpha16 = x[18 * i + 15];
2405     alpha17 = x[18 * i + 16];
2406     alpha18 = x[18 * i + 17];
2407     while (n-- > 0) {
2408       y[18 * (*idx)] += alpha1 * (*v);
2409       y[18 * (*idx) + 1] += alpha2 * (*v);
2410       y[18 * (*idx) + 2] += alpha3 * (*v);
2411       y[18 * (*idx) + 3] += alpha4 * (*v);
2412       y[18 * (*idx) + 4] += alpha5 * (*v);
2413       y[18 * (*idx) + 5] += alpha6 * (*v);
2414       y[18 * (*idx) + 6] += alpha7 * (*v);
2415       y[18 * (*idx) + 7] += alpha8 * (*v);
2416       y[18 * (*idx) + 8] += alpha9 * (*v);
2417       y[18 * (*idx) + 9] += alpha10 * (*v);
2418       y[18 * (*idx) + 10] += alpha11 * (*v);
2419       y[18 * (*idx) + 11] += alpha12 * (*v);
2420       y[18 * (*idx) + 12] += alpha13 * (*v);
2421       y[18 * (*idx) + 13] += alpha14 * (*v);
2422       y[18 * (*idx) + 14] += alpha15 * (*v);
2423       y[18 * (*idx) + 15] += alpha16 * (*v);
2424       y[18 * (*idx) + 16] += alpha17 * (*v);
2425       y[18 * (*idx) + 17] += alpha18 * (*v);
2426       idx++;
2427       v++;
2428     }
2429   }
2430   PetscCall(PetscLogFlops(36.0 * a->nz));
2431   PetscCall(VecRestoreArrayRead(xx, &x));
2432   PetscCall(VecRestoreArray(yy, &y));
2433   PetscFunctionReturn(0);
2434 }
2435 
2436 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) {
2437   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2438   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2439   const PetscScalar *x, *v;
2440   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2441   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2442   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2443   PetscInt           n, i, jrow, j;
2444 
2445   PetscFunctionBegin;
2446   if (yy != zz) PetscCall(VecCopy(yy, zz));
2447   PetscCall(VecGetArrayRead(xx, &x));
2448   PetscCall(VecGetArray(zz, &y));
2449   idx = a->j;
2450   v   = a->a;
2451   ii  = a->i;
2452 
2453   for (i = 0; i < m; i++) {
2454     jrow  = ii[i];
2455     n     = ii[i + 1] - jrow;
2456     sum1  = 0.0;
2457     sum2  = 0.0;
2458     sum3  = 0.0;
2459     sum4  = 0.0;
2460     sum5  = 0.0;
2461     sum6  = 0.0;
2462     sum7  = 0.0;
2463     sum8  = 0.0;
2464     sum9  = 0.0;
2465     sum10 = 0.0;
2466     sum11 = 0.0;
2467     sum12 = 0.0;
2468     sum13 = 0.0;
2469     sum14 = 0.0;
2470     sum15 = 0.0;
2471     sum16 = 0.0;
2472     sum17 = 0.0;
2473     sum18 = 0.0;
2474     for (j = 0; j < n; j++) {
2475       sum1 += v[jrow] * x[18 * idx[jrow]];
2476       sum2 += v[jrow] * x[18 * idx[jrow] + 1];
2477       sum3 += v[jrow] * x[18 * idx[jrow] + 2];
2478       sum4 += v[jrow] * x[18 * idx[jrow] + 3];
2479       sum5 += v[jrow] * x[18 * idx[jrow] + 4];
2480       sum6 += v[jrow] * x[18 * idx[jrow] + 5];
2481       sum7 += v[jrow] * x[18 * idx[jrow] + 6];
2482       sum8 += v[jrow] * x[18 * idx[jrow] + 7];
2483       sum9 += v[jrow] * x[18 * idx[jrow] + 8];
2484       sum10 += v[jrow] * x[18 * idx[jrow] + 9];
2485       sum11 += v[jrow] * x[18 * idx[jrow] + 10];
2486       sum12 += v[jrow] * x[18 * idx[jrow] + 11];
2487       sum13 += v[jrow] * x[18 * idx[jrow] + 12];
2488       sum14 += v[jrow] * x[18 * idx[jrow] + 13];
2489       sum15 += v[jrow] * x[18 * idx[jrow] + 14];
2490       sum16 += v[jrow] * x[18 * idx[jrow] + 15];
2491       sum17 += v[jrow] * x[18 * idx[jrow] + 16];
2492       sum18 += v[jrow] * x[18 * idx[jrow] + 17];
2493       jrow++;
2494     }
2495     y[18 * i] += sum1;
2496     y[18 * i + 1] += sum2;
2497     y[18 * i + 2] += sum3;
2498     y[18 * i + 3] += sum4;
2499     y[18 * i + 4] += sum5;
2500     y[18 * i + 5] += sum6;
2501     y[18 * i + 6] += sum7;
2502     y[18 * i + 7] += sum8;
2503     y[18 * i + 8] += sum9;
2504     y[18 * i + 9] += sum10;
2505     y[18 * i + 10] += sum11;
2506     y[18 * i + 11] += sum12;
2507     y[18 * i + 12] += sum13;
2508     y[18 * i + 13] += sum14;
2509     y[18 * i + 14] += sum15;
2510     y[18 * i + 15] += sum16;
2511     y[18 * i + 16] += sum17;
2512     y[18 * i + 17] += sum18;
2513   }
2514 
2515   PetscCall(PetscLogFlops(36.0 * a->nz));
2516   PetscCall(VecRestoreArrayRead(xx, &x));
2517   PetscCall(VecRestoreArray(zz, &y));
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) {
2522   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2523   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2524   const PetscScalar *x, *v;
2525   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2526   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18;
2527   const PetscInt     m = b->AIJ->rmap->n, *idx;
2528   PetscInt           n, i;
2529 
2530   PetscFunctionBegin;
2531   if (yy != zz) PetscCall(VecCopy(yy, zz));
2532   PetscCall(VecGetArrayRead(xx, &x));
2533   PetscCall(VecGetArray(zz, &y));
2534   for (i = 0; i < m; i++) {
2535     idx     = a->j + a->i[i];
2536     v       = a->a + a->i[i];
2537     n       = a->i[i + 1] - a->i[i];
2538     alpha1  = x[18 * i];
2539     alpha2  = x[18 * i + 1];
2540     alpha3  = x[18 * i + 2];
2541     alpha4  = x[18 * i + 3];
2542     alpha5  = x[18 * i + 4];
2543     alpha6  = x[18 * i + 5];
2544     alpha7  = x[18 * i + 6];
2545     alpha8  = x[18 * i + 7];
2546     alpha9  = x[18 * i + 8];
2547     alpha10 = x[18 * i + 9];
2548     alpha11 = x[18 * i + 10];
2549     alpha12 = x[18 * i + 11];
2550     alpha13 = x[18 * i + 12];
2551     alpha14 = x[18 * i + 13];
2552     alpha15 = x[18 * i + 14];
2553     alpha16 = x[18 * i + 15];
2554     alpha17 = x[18 * i + 16];
2555     alpha18 = x[18 * i + 17];
2556     while (n-- > 0) {
2557       y[18 * (*idx)] += alpha1 * (*v);
2558       y[18 * (*idx) + 1] += alpha2 * (*v);
2559       y[18 * (*idx) + 2] += alpha3 * (*v);
2560       y[18 * (*idx) + 3] += alpha4 * (*v);
2561       y[18 * (*idx) + 4] += alpha5 * (*v);
2562       y[18 * (*idx) + 5] += alpha6 * (*v);
2563       y[18 * (*idx) + 6] += alpha7 * (*v);
2564       y[18 * (*idx) + 7] += alpha8 * (*v);
2565       y[18 * (*idx) + 8] += alpha9 * (*v);
2566       y[18 * (*idx) + 9] += alpha10 * (*v);
2567       y[18 * (*idx) + 10] += alpha11 * (*v);
2568       y[18 * (*idx) + 11] += alpha12 * (*v);
2569       y[18 * (*idx) + 12] += alpha13 * (*v);
2570       y[18 * (*idx) + 13] += alpha14 * (*v);
2571       y[18 * (*idx) + 14] += alpha15 * (*v);
2572       y[18 * (*idx) + 15] += alpha16 * (*v);
2573       y[18 * (*idx) + 16] += alpha17 * (*v);
2574       y[18 * (*idx) + 17] += alpha18 * (*v);
2575       idx++;
2576       v++;
2577     }
2578   }
2579   PetscCall(PetscLogFlops(36.0 * a->nz));
2580   PetscCall(VecRestoreArrayRead(xx, &x));
2581   PetscCall(VecRestoreArray(zz, &y));
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) {
2586   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2587   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2588   const PetscScalar *x, *v;
2589   PetscScalar       *y, *sums;
2590   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2591   PetscInt           n, i, jrow, j, dof = b->dof, k;
2592 
2593   PetscFunctionBegin;
2594   PetscCall(VecGetArrayRead(xx, &x));
2595   PetscCall(VecSet(yy, 0.0));
2596   PetscCall(VecGetArray(yy, &y));
2597   idx = a->j;
2598   v   = a->a;
2599   ii  = a->i;
2600 
2601   for (i = 0; i < m; i++) {
2602     jrow = ii[i];
2603     n    = ii[i + 1] - jrow;
2604     sums = y + dof * i;
2605     for (j = 0; j < n; j++) {
2606       for (k = 0; k < dof; k++) { sums[k] += v[jrow] * x[dof * idx[jrow] + k]; }
2607       jrow++;
2608     }
2609   }
2610 
2611   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
2612   PetscCall(VecRestoreArrayRead(xx, &x));
2613   PetscCall(VecRestoreArray(yy, &y));
2614   PetscFunctionReturn(0);
2615 }
2616 
2617 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) {
2618   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2619   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2620   const PetscScalar *x, *v;
2621   PetscScalar       *y, *sums;
2622   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2623   PetscInt           n, i, jrow, j, dof = b->dof, k;
2624 
2625   PetscFunctionBegin;
2626   if (yy != zz) PetscCall(VecCopy(yy, zz));
2627   PetscCall(VecGetArrayRead(xx, &x));
2628   PetscCall(VecGetArray(zz, &y));
2629   idx = a->j;
2630   v   = a->a;
2631   ii  = a->i;
2632 
2633   for (i = 0; i < m; i++) {
2634     jrow = ii[i];
2635     n    = ii[i + 1] - jrow;
2636     sums = y + dof * i;
2637     for (j = 0; j < n; j++) {
2638       for (k = 0; k < dof; k++) { sums[k] += v[jrow] * x[dof * idx[jrow] + k]; }
2639       jrow++;
2640     }
2641   }
2642 
2643   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
2644   PetscCall(VecRestoreArrayRead(xx, &x));
2645   PetscCall(VecRestoreArray(zz, &y));
2646   PetscFunctionReturn(0);
2647 }
2648 
2649 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) {
2650   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2651   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2652   const PetscScalar *x, *v, *alpha;
2653   PetscScalar       *y;
2654   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
2655   PetscInt           n, i, k;
2656 
2657   PetscFunctionBegin;
2658   PetscCall(VecGetArrayRead(xx, &x));
2659   PetscCall(VecSet(yy, 0.0));
2660   PetscCall(VecGetArray(yy, &y));
2661   for (i = 0; i < m; i++) {
2662     idx   = a->j + a->i[i];
2663     v     = a->a + a->i[i];
2664     n     = a->i[i + 1] - a->i[i];
2665     alpha = x + dof * i;
2666     while (n-- > 0) {
2667       for (k = 0; k < dof; k++) { y[dof * (*idx) + k] += alpha[k] * (*v); }
2668       idx++;
2669       v++;
2670     }
2671   }
2672   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
2673   PetscCall(VecRestoreArrayRead(xx, &x));
2674   PetscCall(VecRestoreArray(yy, &y));
2675   PetscFunctionReturn(0);
2676 }
2677 
2678 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) {
2679   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2680   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2681   const PetscScalar *x, *v, *alpha;
2682   PetscScalar       *y;
2683   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
2684   PetscInt           n, i, k;
2685 
2686   PetscFunctionBegin;
2687   if (yy != zz) PetscCall(VecCopy(yy, zz));
2688   PetscCall(VecGetArrayRead(xx, &x));
2689   PetscCall(VecGetArray(zz, &y));
2690   for (i = 0; i < m; i++) {
2691     idx   = a->j + a->i[i];
2692     v     = a->a + a->i[i];
2693     n     = a->i[i + 1] - a->i[i];
2694     alpha = x + dof * i;
2695     while (n-- > 0) {
2696       for (k = 0; k < dof; k++) { y[dof * (*idx) + k] += alpha[k] * (*v); }
2697       idx++;
2698       v++;
2699     }
2700   }
2701   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
2702   PetscCall(VecRestoreArrayRead(xx, &x));
2703   PetscCall(VecRestoreArray(zz, &y));
2704   PetscFunctionReturn(0);
2705 }
2706 
2707 /*===================================================================================*/
2708 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) {
2709   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2710 
2711   PetscFunctionBegin;
2712   /* start the scatter */
2713   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
2714   PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy));
2715   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
2716   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy));
2717   PetscFunctionReturn(0);
2718 }
2719 
2720 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) {
2721   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2722 
2723   PetscFunctionBegin;
2724   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
2725   PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy));
2726   PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
2727   PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
2728   PetscFunctionReturn(0);
2729 }
2730 
2731 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) {
2732   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2733 
2734   PetscFunctionBegin;
2735   /* start the scatter */
2736   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
2737   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz));
2738   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
2739   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
2740   PetscFunctionReturn(0);
2741 }
2742 
2743 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) {
2744   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2745 
2746   PetscFunctionBegin;
2747   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
2748   PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz));
2749   PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
2750   PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
2751   PetscFunctionReturn(0);
2752 }
2753 
2754 /* ----------------------------------------------------------------*/
2755 PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) {
2756   Mat_Product *product = C->product;
2757 
2758   PetscFunctionBegin;
2759   if (product->type == MATPRODUCT_PtAP) {
2760     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2761   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
2762   PetscFunctionReturn(0);
2763 }
2764 
2765 PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) {
2766   Mat_Product *product = C->product;
2767   PetscBool    flg     = PETSC_FALSE;
2768   Mat          A = product->A, P = product->B;
2769   PetscInt     alg = 1; /* set default algorithm */
2770 #if !defined(PETSC_HAVE_HYPRE)
2771   const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
2772   PetscInt    nalg        = 4;
2773 #else
2774   const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
2775   PetscInt    nalg        = 5;
2776 #endif
2777 
2778   PetscFunctionBegin;
2779   PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]);
2780 
2781   /* PtAP */
2782   /* Check matrix local sizes */
2783   PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2784              A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
2785   PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2786              A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);
2787 
2788   /* Set the default algorithm */
2789   PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
2790   if (flg) { PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); }
2791 
2792   /* Get runtime option */
2793   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
2794   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
2795   if (flg) { PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); }
2796   PetscOptionsEnd();
2797 
2798   PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg));
2799   if (flg) {
2800     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2801     PetscFunctionReturn(0);
2802   }
2803 
2804   PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg));
2805   if (flg) {
2806     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2807     PetscFunctionReturn(0);
2808   }
2809 
2810   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
2811   PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
2812   PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P));
2813   PetscCall(MatProductSetFromOptions(C));
2814   PetscFunctionReturn(0);
2815 }
2816 
2817 /* ----------------------------------------------------------------*/
2818 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) {
2819   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2820   Mat_SeqMAIJ       *pp = (Mat_SeqMAIJ *)PP->data;
2821   Mat                P  = pp->AIJ;
2822   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
2823   PetscInt          *pti, *ptj, *ptJ;
2824   PetscInt          *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
2825   const PetscInt     an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
2826   PetscInt           i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
2827   MatScalar         *ca;
2828   const PetscInt    *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;
2829 
2830   PetscFunctionBegin;
2831   /* Get ij structure of P^T */
2832   PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
2833 
2834   cn = pn * ppdof;
2835   /* Allocate ci array, arrays for fill computation and */
2836   /* free space for accumulating nonzero column info */
2837   PetscCall(PetscMalloc1(cn + 1, &ci));
2838   ci[0] = 0;
2839 
2840   /* Work arrays for rows of P^T*A */
2841   PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow));
2842   PetscCall(PetscArrayzero(ptadenserow, an));
2843   PetscCall(PetscArrayzero(denserow, cn));
2844 
2845   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2846   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2847   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2848   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space));
2849   current_space = free_space;
2850 
2851   /* Determine symbolic info for each row of C: */
2852   for (i = 0; i < pn; i++) {
2853     ptnzi = pti[i + 1] - pti[i];
2854     ptJ   = ptj + pti[i];
2855     for (dof = 0; dof < ppdof; dof++) {
2856       ptanzi = 0;
2857       /* Determine symbolic row of PtA: */
2858       for (j = 0; j < ptnzi; j++) {
2859         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2860         arow = ptJ[j] * ppdof + dof;
2861         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2862         anzj = ai[arow + 1] - ai[arow];
2863         ajj  = aj + ai[arow];
2864         for (k = 0; k < anzj; k++) {
2865           if (!ptadenserow[ajj[k]]) {
2866             ptadenserow[ajj[k]]    = -1;
2867             ptasparserow[ptanzi++] = ajj[k];
2868           }
2869         }
2870       }
2871       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2872       ptaj = ptasparserow;
2873       cnzi = 0;
2874       for (j = 0; j < ptanzi; j++) {
2875         /* Get offset within block of P */
2876         pshift = *ptaj % ppdof;
2877         /* Get block row of P */
2878         prow   = (*ptaj++) / ppdof; /* integer division */
2879         /* P has same number of nonzeros per row as the compressed form */
2880         pnzj   = pi[prow + 1] - pi[prow];
2881         pjj    = pj + pi[prow];
2882         for (k = 0; k < pnzj; k++) {
2883           /* Locations in C are shifted by the offset within the block */
2884           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2885           if (!denserow[pjj[k] * ppdof + pshift]) {
2886             denserow[pjj[k] * ppdof + pshift] = -1;
2887             sparserow[cnzi++]                 = pjj[k] * ppdof + pshift;
2888           }
2889         }
2890       }
2891 
2892       /* sort sparserow */
2893       PetscCall(PetscSortInt(cnzi, sparserow));
2894 
2895       /* If free space is not available, make more free space */
2896       /* Double the amount of total space in the list */
2897       if (current_space->local_remaining < cnzi) { PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), &current_space)); }
2898 
2899       /* Copy data into free space, and zero out denserows */
2900       PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi));
2901 
2902       current_space->array += cnzi;
2903       current_space->local_used += cnzi;
2904       current_space->local_remaining -= cnzi;
2905 
2906       for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
2907       for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;
2908 
2909       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2910       /*        For now, we will recompute what is needed. */
2911       ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
2912     }
2913   }
2914   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2915   /* Allocate space for cj, initialize cj, and */
2916   /* destroy list of free space and other temporary array(s) */
2917   PetscCall(PetscMalloc1(ci[cn] + 1, &cj));
2918   PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
2919   PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow));
2920 
2921   /* Allocate space for ca */
2922   PetscCall(PetscCalloc1(ci[cn] + 1, &ca));
2923 
2924   /* put together the new matrix */
2925   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C));
2926   PetscCall(MatSetBlockSize(C, pp->dof));
2927 
2928   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2929   /* Since these are PETSc arrays, change flags to free them as necessary. */
2930   c          = (Mat_SeqAIJ *)(C->data);
2931   c->free_a  = PETSC_TRUE;
2932   c->free_ij = PETSC_TRUE;
2933   c->nonew   = 0;
2934 
2935   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2936   C->ops->productnumeric = MatProductNumeric_PtAP;
2937 
2938   /* Clean up. */
2939   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
2940   PetscFunctionReturn(0);
2941 }
2942 
2943 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) {
2944   /* This routine requires testing -- first draft only */
2945   Mat_SeqMAIJ     *pp = (Mat_SeqMAIJ *)PP->data;
2946   Mat              P  = pp->AIJ;
2947   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
2948   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *)P->data;
2949   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *)C->data;
2950   const PetscInt  *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
2951   const PetscInt  *ci = c->i, *cj = c->j, *cjj;
2952   const PetscInt   am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
2953   PetscInt         i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
2954   const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
2955   MatScalar       *ca = c->a, *caj, *apa;
2956 
2957   PetscFunctionBegin;
2958   /* Allocate temporary array for storage of one row of A*P */
2959   PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense));
2960 
2961   /* Clear old values in C */
2962   PetscCall(PetscArrayzero(ca, ci[cm]));
2963 
2964   for (i = 0; i < am; i++) {
2965     /* Form sparse row of A*P */
2966     anzi  = ai[i + 1] - ai[i];
2967     apnzj = 0;
2968     for (j = 0; j < anzi; j++) {
2969       /* Get offset within block of P */
2970       pshift = *aj % ppdof;
2971       /* Get block row of P */
2972       prow   = *aj++ / ppdof; /* integer division */
2973       pnzj   = pi[prow + 1] - pi[prow];
2974       pjj    = pj + pi[prow];
2975       paj    = pa + pi[prow];
2976       for (k = 0; k < pnzj; k++) {
2977         poffset = pjj[k] * ppdof + pshift;
2978         if (!apjdense[poffset]) {
2979           apjdense[poffset] = -1;
2980           apj[apnzj++]      = poffset;
2981         }
2982         apa[poffset] += (*aa) * paj[k];
2983       }
2984       PetscCall(PetscLogFlops(2.0 * pnzj));
2985       aa++;
2986     }
2987 
2988     /* Sort the j index array for quick sparse axpy. */
2989     /* Note: a array does not need sorting as it is in dense storage locations. */
2990     PetscCall(PetscSortInt(apnzj, apj));
2991 
2992     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2993     prow    = i / ppdof; /* integer division */
2994     pshift  = i % ppdof;
2995     poffset = pi[prow];
2996     pnzi    = pi[prow + 1] - poffset;
2997     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2998     pJ      = pj + poffset;
2999     pA      = pa + poffset;
3000     for (j = 0; j < pnzi; j++) {
3001       crow = (*pJ) * ppdof + pshift;
3002       cjj  = cj + ci[crow];
3003       caj  = ca + ci[crow];
3004       pJ++;
3005       /* Perform sparse axpy operation.  Note cjj includes apj. */
3006       for (k = 0, nextap = 0; nextap < apnzj; k++) {
3007         if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
3008       }
3009       PetscCall(PetscLogFlops(2.0 * apnzj));
3010       pA++;
3011     }
3012 
3013     /* Zero the current row info for A*P */
3014     for (j = 0; j < apnzj; j++) {
3015       apa[apj[j]]      = 0.;
3016       apjdense[apj[j]] = 0;
3017     }
3018   }
3019 
3020   /* Assemble the final matrix and clean up */
3021   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3022   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3023   PetscCall(PetscFree3(apa, apj, apjdense));
3024   PetscFunctionReturn(0);
3025 }
3026 
3027 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) {
3028   Mat_Product *product = C->product;
3029   Mat          A = product->A, P = product->B;
3030 
3031   PetscFunctionBegin;
3032   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C));
3033   PetscFunctionReturn(0);
3034 }
3035 
3036 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A, Mat PP, PetscReal fill, Mat *C) {
3037   PetscFunctionBegin;
3038   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3039 }
3040 
3041 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A, Mat PP, Mat C) {
3042   PetscFunctionBegin;
3043   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3044 }
3045 
3046 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);
3047 
3048 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) {
3049   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3050 
3051   PetscFunctionBegin;
3052 
3053   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C));
3054   PetscFunctionReturn(0);
3055 }
3056 
3057 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);
3058 
3059 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) {
3060   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3061 
3062   PetscFunctionBegin;
3063   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C));
3064   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3065   PetscFunctionReturn(0);
3066 }
3067 
3068 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);
3069 
3070 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) {
3071   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3072 
3073   PetscFunctionBegin;
3074 
3075   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C));
3076   PetscFunctionReturn(0);
3077 }
3078 
3079 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);
3080 
3081 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) {
3082   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3083 
3084   PetscFunctionBegin;
3085 
3086   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C));
3087   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3088   PetscFunctionReturn(0);
3089 }
3090 
3091 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) {
3092   Mat_Product *product = C->product;
3093   Mat          A = product->A, P = product->B;
3094   PetscBool    flg;
3095 
3096   PetscFunctionBegin;
3097   PetscCall(PetscStrcmp(product->alg, "allatonce", &flg));
3098   if (flg) {
3099     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C));
3100     C->ops->productnumeric = MatProductNumeric_PtAP;
3101     PetscFunctionReturn(0);
3102   }
3103 
3104   PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg));
3105   if (flg) {
3106     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C));
3107     C->ops->productnumeric = MatProductNumeric_PtAP;
3108     PetscFunctionReturn(0);
3109   }
3110 
3111   SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
3112 }
3113 
3114 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
3115   Mat_SeqMAIJ *b   = (Mat_SeqMAIJ *)A->data;
3116   Mat          a   = b->AIJ, B;
3117   Mat_SeqAIJ  *aij = (Mat_SeqAIJ *)a->data;
3118   PetscInt     m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
3119   PetscInt    *cols;
3120   PetscScalar *vals;
3121 
3122   PetscFunctionBegin;
3123   PetscCall(MatGetSize(a, &m, &n));
3124   PetscCall(PetscMalloc1(dof * m, &ilen));
3125   for (i = 0; i < m; i++) {
3126     nmax = PetscMax(nmax, aij->ilen[i]);
3127     for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
3128   }
3129   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
3130   PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n));
3131   PetscCall(MatSetType(B, newtype));
3132   PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen));
3133   PetscCall(PetscFree(ilen));
3134   PetscCall(PetscMalloc1(nmax, &icols));
3135   ii = 0;
3136   for (i = 0; i < m; i++) {
3137     PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals));
3138     for (j = 0; j < dof; j++) {
3139       for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
3140       PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
3141       ii++;
3142     }
3143     PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals));
3144   }
3145   PetscCall(PetscFree(icols));
3146   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3147   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3148 
3149   if (reuse == MAT_INPLACE_MATRIX) {
3150     PetscCall(MatHeaderReplace(A, &B));
3151   } else {
3152     *newmat = B;
3153   }
3154   PetscFunctionReturn(0);
3155 }
3156 
3157 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3158 
3159 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
3160   Mat_MPIMAIJ *maij    = (Mat_MPIMAIJ *)A->data;
3161   Mat          MatAIJ  = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
3162   Mat          MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
3163   Mat_SeqAIJ  *AIJ     = (Mat_SeqAIJ *)MatAIJ->data;
3164   Mat_SeqAIJ  *OAIJ    = (Mat_SeqAIJ *)MatOAIJ->data;
3165   Mat_MPIAIJ  *mpiaij  = (Mat_MPIAIJ *)maij->A->data;
3166   PetscInt     dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
3167   PetscInt    *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
3168   PetscInt     rstart, cstart, *garray, ii, k;
3169   PetscScalar *vals, *ovals;
3170 
3171   PetscFunctionBegin;
3172   PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz));
3173   for (i = 0; i < A->rmap->n / dof; i++) {
3174     nmax  = PetscMax(nmax, AIJ->ilen[i]);
3175     onmax = PetscMax(onmax, OAIJ->ilen[i]);
3176     for (j = 0; j < dof; j++) {
3177       dnz[dof * i + j] = AIJ->ilen[i];
3178       onz[dof * i + j] = OAIJ->ilen[i];
3179     }
3180   }
3181   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3182   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3183   PetscCall(MatSetType(B, newtype));
3184   PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz));
3185   PetscCall(MatSetBlockSize(B, dof));
3186   PetscCall(PetscFree2(dnz, onz));
3187 
3188   PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols));
3189   rstart = dof * maij->A->rmap->rstart;
3190   cstart = dof * maij->A->cmap->rstart;
3191   garray = mpiaij->garray;
3192 
3193   ii = rstart;
3194   for (i = 0; i < A->rmap->n / dof; i++) {
3195     PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
3196     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
3197     for (j = 0; j < dof; j++) {
3198       for (k = 0; k < ncols; k++) { icols[k] = cstart + dof * cols[k] + j; }
3199       for (k = 0; k < oncols; k++) { oicols[k] = dof * garray[ocols[k]] + j; }
3200       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
3201       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES));
3202       ii++;
3203     }
3204     PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
3205     PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
3206   }
3207   PetscCall(PetscFree2(icols, oicols));
3208 
3209   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3210   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3211 
3212   if (reuse == MAT_INPLACE_MATRIX) {
3213     PetscInt refct          = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3214     ((PetscObject)A)->refct = 1;
3215 
3216     PetscCall(MatHeaderReplace(A, &B));
3217 
3218     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3219   } else {
3220     *newmat = B;
3221   }
3222   PetscFunctionReturn(0);
3223 }
3224 
3225 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) {
3226   Mat A;
3227 
3228   PetscFunctionBegin;
3229   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
3230   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
3231   PetscCall(MatDestroy(&A));
3232   PetscFunctionReturn(0);
3233 }
3234 
3235 PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) {
3236   Mat A;
3237 
3238   PetscFunctionBegin;
3239   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
3240   PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat));
3241   PetscCall(MatDestroy(&A));
3242   PetscFunctionReturn(0);
3243 }
3244 
3245 /* ---------------------------------------------------------------------------------- */
3246 /*@
3247   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3248   operations for multicomponent problems.  It interpolates each component the same
3249   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3250   and MATMPIAIJ for distributed matrices.
3251 
3252   Collective
3253 
3254   Input Parameters:
3255 + A - the AIJ matrix describing the action on blocks
3256 - dof - the block size (number of components per node)
3257 
3258   Output Parameter:
3259 . maij - the new MAIJ matrix
3260 
3261   Operations provided:
3262 .vb
3263     MatMult
3264     MatMultTranspose
3265     MatMultAdd
3266     MatMultTransposeAdd
3267     MatView
3268 .ve
3269 
3270   Level: advanced
3271 
3272 .seealso: `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ`
3273 @*/
3274 PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) {
3275   PetscInt  n;
3276   Mat       B;
3277   PetscBool flg;
3278 #if defined(PETSC_HAVE_CUDA)
3279   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3280   PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3281 #endif
3282 
3283   PetscFunctionBegin;
3284   dof = PetscAbs(dof);
3285   PetscCall(PetscObjectReference((PetscObject)A));
3286 
3287   if (dof == 1) *maij = A;
3288   else {
3289     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3290     /* propagate vec type */
3291     PetscCall(MatSetVecType(B, A->defaultvectype));
3292     PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N));
3293     PetscCall(PetscLayoutSetBlockSize(B->rmap, dof));
3294     PetscCall(PetscLayoutSetBlockSize(B->cmap, dof));
3295     PetscCall(PetscLayoutSetUp(B->rmap));
3296     PetscCall(PetscLayoutSetUp(B->cmap));
3297 
3298     B->assembled = PETSC_TRUE;
3299 
3300     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
3301     if (flg) {
3302       Mat_SeqMAIJ *b;
3303 
3304       PetscCall(MatSetType(B, MATSEQMAIJ));
3305 
3306       B->ops->setup   = NULL;
3307       B->ops->destroy = MatDestroy_SeqMAIJ;
3308       B->ops->view    = MatView_SeqMAIJ;
3309 
3310       b      = (Mat_SeqMAIJ *)B->data;
3311       b->dof = dof;
3312       b->AIJ = A;
3313 
3314       if (dof == 2) {
3315         B->ops->mult             = MatMult_SeqMAIJ_2;
3316         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3317         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3318         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3319       } else if (dof == 3) {
3320         B->ops->mult             = MatMult_SeqMAIJ_3;
3321         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3322         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3323         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3324       } else if (dof == 4) {
3325         B->ops->mult             = MatMult_SeqMAIJ_4;
3326         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3327         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3328         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3329       } else if (dof == 5) {
3330         B->ops->mult             = MatMult_SeqMAIJ_5;
3331         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3332         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3333         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3334       } else if (dof == 6) {
3335         B->ops->mult             = MatMult_SeqMAIJ_6;
3336         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3337         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3338         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3339       } else if (dof == 7) {
3340         B->ops->mult             = MatMult_SeqMAIJ_7;
3341         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3342         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3343         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3344       } else if (dof == 8) {
3345         B->ops->mult             = MatMult_SeqMAIJ_8;
3346         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3347         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3348         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3349       } else if (dof == 9) {
3350         B->ops->mult             = MatMult_SeqMAIJ_9;
3351         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3352         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3353         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3354       } else if (dof == 10) {
3355         B->ops->mult             = MatMult_SeqMAIJ_10;
3356         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3357         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3358         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3359       } else if (dof == 11) {
3360         B->ops->mult             = MatMult_SeqMAIJ_11;
3361         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3362         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3363         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3364       } else if (dof == 16) {
3365         B->ops->mult             = MatMult_SeqMAIJ_16;
3366         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3367         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3368         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3369       } else if (dof == 18) {
3370         B->ops->mult             = MatMult_SeqMAIJ_18;
3371         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3372         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3373         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3374       } else {
3375         B->ops->mult             = MatMult_SeqMAIJ_N;
3376         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3377         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3378         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3379       }
3380 #if defined(PETSC_HAVE_CUDA)
3381       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ));
3382 #endif
3383       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ));
3384       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
3385     } else {
3386       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3387       Mat_MPIMAIJ *b;
3388       IS           from, to;
3389       Vec          gvec;
3390 
3391       PetscCall(MatSetType(B, MATMPIMAIJ));
3392 
3393       B->ops->setup   = NULL;
3394       B->ops->destroy = MatDestroy_MPIMAIJ;
3395       B->ops->view    = MatView_MPIMAIJ;
3396 
3397       b      = (Mat_MPIMAIJ *)B->data;
3398       b->dof = dof;
3399       b->A   = A;
3400 
3401       PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ));
3402       PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ));
3403 
3404       PetscCall(VecGetSize(mpiaij->lvec, &n));
3405       PetscCall(VecCreate(PETSC_COMM_SELF, &b->w));
3406       PetscCall(VecSetSizes(b->w, n * dof, n * dof));
3407       PetscCall(VecSetBlockSize(b->w, dof));
3408       PetscCall(VecSetType(b->w, VECSEQ));
3409 
3410       /* create two temporary Index sets for build scatter gather */
3411       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
3412       PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to));
3413 
3414       /* create temporary global vector to generate scatter context */
3415       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec));
3416 
3417       /* generate the scatter context */
3418       PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx));
3419 
3420       PetscCall(ISDestroy(&from));
3421       PetscCall(ISDestroy(&to));
3422       PetscCall(VecDestroy(&gvec));
3423 
3424       B->ops->mult             = MatMult_MPIMAIJ_dof;
3425       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3426       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3427       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3428 
3429 #if defined(PETSC_HAVE_CUDA)
3430       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ));
3431 #endif
3432       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ));
3433       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
3434     }
3435     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3436     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3437     PetscCall(MatSetUp(B));
3438 #if defined(PETSC_HAVE_CUDA)
3439     /* temporary until we have CUDA implementation of MAIJ */
3440     {
3441       PetscBool flg;
3442       if (convert) {
3443         PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, ""));
3444         if (flg) { PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); }
3445       }
3446     }
3447 #endif
3448     *maij = B;
3449   }
3450   PetscFunctionReturn(0);
3451 }
3452