xref: /petsc/src/mat/impls/maij/maij.c (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928)
1 
2 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
3 #include <../src/mat/utils/freespace.h>
4 
5 /*@
6    MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix
7 
8    Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
9 
10    Input Parameter:
11 .  A - the `MATMAIJ` matrix
12 
13    Output Parameter:
14 .  B - the `MATAIJ` matrix
15 
16    Level: advanced
17 
18    Note:
19     The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
20 
21 .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()`
22 @*/
23 PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) {
24   PetscBool ismpimaij, isseqmaij;
25 
26   PetscFunctionBegin;
27   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij));
28   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij));
29   if (ismpimaij) {
30     Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
31 
32     *B = b->A;
33   } else if (isseqmaij) {
34     Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
35 
36     *B = b->AIJ;
37   } else {
38     *B = A;
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 /*@
44    MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size
45 
46    Logically Collective
47 
48    Input Parameters:
49 +  A - the `MATMAIJ` matrix
50 -  dof - the block size for the new matrix
51 
52    Output Parameter:
53 .  B - the new `MATMAIJ` matrix
54 
55    Level: advanced
56 
57 .seealso: `MATMAIJ`, `MatCreateMAIJ()`
58 @*/
59 PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) {
60   Mat Aij = NULL;
61 
62   PetscFunctionBegin;
63   PetscValidLogicalCollectiveInt(A, dof, 2);
64   PetscCall(MatMAIJGetAIJ(A, &Aij));
65   PetscCall(MatCreateMAIJ(Aij, dof, B));
66   PetscFunctionReturn(0);
67 }
68 
69 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) {
70   Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
71 
72   PetscFunctionBegin;
73   PetscCall(MatDestroy(&b->AIJ));
74   PetscCall(PetscFree(A->data));
75   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL));
78   PetscFunctionReturn(0);
79 }
80 
81 PetscErrorCode MatSetUp_MAIJ(Mat A) {
82   PetscFunctionBegin;
83   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
84 }
85 
86 PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) {
87   Mat B;
88 
89   PetscFunctionBegin;
90   PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
91   PetscCall(MatView(B, viewer));
92   PetscCall(MatDestroy(&B));
93   PetscFunctionReturn(0);
94 }
95 
96 PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) {
97   Mat B;
98 
99   PetscFunctionBegin;
100   PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B));
101   PetscCall(MatView(B, viewer));
102   PetscCall(MatDestroy(&B));
103   PetscFunctionReturn(0);
104 }
105 
106 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) {
107   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
108 
109   PetscFunctionBegin;
110   PetscCall(MatDestroy(&b->AIJ));
111   PetscCall(MatDestroy(&b->OAIJ));
112   PetscCall(MatDestroy(&b->A));
113   PetscCall(VecScatterDestroy(&b->ctx));
114   PetscCall(VecDestroy(&b->w));
115   PetscCall(PetscFree(A->data));
116   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL));
117   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL));
118   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL));
119   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
120   PetscFunctionReturn(0);
121 }
122 
123 /*MC
124   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
125   multicomponent problems, interpolating or restricting each component the same way independently.
126   The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices.
127 
128   Operations provided:
129 .vb
130     MatMult()
131     MatMultTranspose()
132     MatMultAdd()
133     MatMultTransposeAdd()
134 .ve
135 
136   Level: advanced
137 
138 .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
139 M*/
140 
141 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) {
142   Mat_MPIMAIJ *b;
143   PetscMPIInt  size;
144 
145   PetscFunctionBegin;
146   PetscCall(PetscNewLog(A, &b));
147   A->data = (void *)b;
148 
149   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
150 
151   A->ops->setup = MatSetUp_MAIJ;
152 
153   b->AIJ  = NULL;
154   b->dof  = 0;
155   b->OAIJ = NULL;
156   b->ctx  = NULL;
157   b->w    = NULL;
158   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
159   if (size == 1) {
160     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ));
161   } else {
162     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ));
163   }
164   A->preallocated = PETSC_TRUE;
165   A->assembled    = PETSC_TRUE;
166   PetscFunctionReturn(0);
167 }
168 
169 /* --------------------------------------------------------------------------------------*/
170 PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy) {
171   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
172   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
173   const PetscScalar *x, *v;
174   PetscScalar       *y, sum1, sum2;
175   PetscInt           nonzerorow = 0, n, i, jrow, j;
176   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
177 
178   PetscFunctionBegin;
179   PetscCall(VecGetArrayRead(xx, &x));
180   PetscCall(VecGetArray(yy, &y));
181   idx = a->j;
182   v   = a->a;
183   ii  = a->i;
184 
185   for (i = 0; i < m; i++) {
186     jrow = ii[i];
187     n    = ii[i + 1] - jrow;
188     sum1 = 0.0;
189     sum2 = 0.0;
190 
191     nonzerorow += (n > 0);
192     for (j = 0; j < n; j++) {
193       sum1 += v[jrow] * x[2 * idx[jrow]];
194       sum2 += v[jrow] * x[2 * idx[jrow] + 1];
195       jrow++;
196     }
197     y[2 * i]     = sum1;
198     y[2 * i + 1] = sum2;
199   }
200 
201   PetscCall(PetscLogFlops(4.0 * a->nz - 2.0 * nonzerorow));
202   PetscCall(VecRestoreArrayRead(xx, &x));
203   PetscCall(VecRestoreArray(yy, &y));
204   PetscFunctionReturn(0);
205 }
206 
207 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy) {
208   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
209   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
210   const PetscScalar *x, *v;
211   PetscScalar       *y, alpha1, alpha2;
212   PetscInt           n, i;
213   const PetscInt     m = b->AIJ->rmap->n, *idx;
214 
215   PetscFunctionBegin;
216   PetscCall(VecSet(yy, 0.0));
217   PetscCall(VecGetArrayRead(xx, &x));
218   PetscCall(VecGetArray(yy, &y));
219 
220   for (i = 0; i < m; i++) {
221     idx    = a->j + a->i[i];
222     v      = a->a + a->i[i];
223     n      = a->i[i + 1] - a->i[i];
224     alpha1 = x[2 * i];
225     alpha2 = x[2 * i + 1];
226     while (n-- > 0) {
227       y[2 * (*idx)] += alpha1 * (*v);
228       y[2 * (*idx) + 1] += alpha2 * (*v);
229       idx++;
230       v++;
231     }
232   }
233   PetscCall(PetscLogFlops(4.0 * a->nz));
234   PetscCall(VecRestoreArrayRead(xx, &x));
235   PetscCall(VecRestoreArray(yy, &y));
236   PetscFunctionReturn(0);
237 }
238 
239 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) {
240   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
241   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
242   const PetscScalar *x, *v;
243   PetscScalar       *y, sum1, sum2;
244   PetscInt           n, i, jrow, j;
245   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
246 
247   PetscFunctionBegin;
248   if (yy != zz) PetscCall(VecCopy(yy, zz));
249   PetscCall(VecGetArrayRead(xx, &x));
250   PetscCall(VecGetArray(zz, &y));
251   idx = a->j;
252   v   = a->a;
253   ii  = a->i;
254 
255   for (i = 0; i < m; i++) {
256     jrow = ii[i];
257     n    = ii[i + 1] - jrow;
258     sum1 = 0.0;
259     sum2 = 0.0;
260     for (j = 0; j < n; j++) {
261       sum1 += v[jrow] * x[2 * idx[jrow]];
262       sum2 += v[jrow] * x[2 * idx[jrow] + 1];
263       jrow++;
264     }
265     y[2 * i] += sum1;
266     y[2 * i + 1] += sum2;
267   }
268 
269   PetscCall(PetscLogFlops(4.0 * a->nz));
270   PetscCall(VecRestoreArrayRead(xx, &x));
271   PetscCall(VecRestoreArray(zz, &y));
272   PetscFunctionReturn(0);
273 }
274 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) {
275   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
276   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
277   const PetscScalar *x, *v;
278   PetscScalar       *y, alpha1, alpha2;
279   PetscInt           n, i;
280   const PetscInt     m = b->AIJ->rmap->n, *idx;
281 
282   PetscFunctionBegin;
283   if (yy != zz) PetscCall(VecCopy(yy, zz));
284   PetscCall(VecGetArrayRead(xx, &x));
285   PetscCall(VecGetArray(zz, &y));
286 
287   for (i = 0; i < m; i++) {
288     idx    = a->j + a->i[i];
289     v      = a->a + a->i[i];
290     n      = a->i[i + 1] - a->i[i];
291     alpha1 = x[2 * i];
292     alpha2 = x[2 * i + 1];
293     while (n-- > 0) {
294       y[2 * (*idx)] += alpha1 * (*v);
295       y[2 * (*idx) + 1] += alpha2 * (*v);
296       idx++;
297       v++;
298     }
299   }
300   PetscCall(PetscLogFlops(4.0 * a->nz));
301   PetscCall(VecRestoreArrayRead(xx, &x));
302   PetscCall(VecRestoreArray(zz, &y));
303   PetscFunctionReturn(0);
304 }
305 /* --------------------------------------------------------------------------------------*/
306 PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy) {
307   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
308   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
309   const PetscScalar *x, *v;
310   PetscScalar       *y, sum1, sum2, sum3;
311   PetscInt           nonzerorow = 0, n, i, jrow, j;
312   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
313 
314   PetscFunctionBegin;
315   PetscCall(VecGetArrayRead(xx, &x));
316   PetscCall(VecGetArray(yy, &y));
317   idx = a->j;
318   v   = a->a;
319   ii  = a->i;
320 
321   for (i = 0; i < m; i++) {
322     jrow = ii[i];
323     n    = ii[i + 1] - jrow;
324     sum1 = 0.0;
325     sum2 = 0.0;
326     sum3 = 0.0;
327 
328     nonzerorow += (n > 0);
329     for (j = 0; j < n; j++) {
330       sum1 += v[jrow] * x[3 * idx[jrow]];
331       sum2 += v[jrow] * x[3 * idx[jrow] + 1];
332       sum3 += v[jrow] * x[3 * idx[jrow] + 2];
333       jrow++;
334     }
335     y[3 * i]     = sum1;
336     y[3 * i + 1] = sum2;
337     y[3 * i + 2] = sum3;
338   }
339 
340   PetscCall(PetscLogFlops(6.0 * a->nz - 3.0 * nonzerorow));
341   PetscCall(VecRestoreArrayRead(xx, &x));
342   PetscCall(VecRestoreArray(yy, &y));
343   PetscFunctionReturn(0);
344 }
345 
346 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy) {
347   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
348   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
349   const PetscScalar *x, *v;
350   PetscScalar       *y, alpha1, alpha2, alpha3;
351   PetscInt           n, i;
352   const PetscInt     m = b->AIJ->rmap->n, *idx;
353 
354   PetscFunctionBegin;
355   PetscCall(VecSet(yy, 0.0));
356   PetscCall(VecGetArrayRead(xx, &x));
357   PetscCall(VecGetArray(yy, &y));
358 
359   for (i = 0; i < m; i++) {
360     idx    = a->j + a->i[i];
361     v      = a->a + a->i[i];
362     n      = a->i[i + 1] - a->i[i];
363     alpha1 = x[3 * i];
364     alpha2 = x[3 * i + 1];
365     alpha3 = x[3 * i + 2];
366     while (n-- > 0) {
367       y[3 * (*idx)] += alpha1 * (*v);
368       y[3 * (*idx) + 1] += alpha2 * (*v);
369       y[3 * (*idx) + 2] += alpha3 * (*v);
370       idx++;
371       v++;
372     }
373   }
374   PetscCall(PetscLogFlops(6.0 * a->nz));
375   PetscCall(VecRestoreArrayRead(xx, &x));
376   PetscCall(VecRestoreArray(yy, &y));
377   PetscFunctionReturn(0);
378 }
379 
380 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) {
381   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
382   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
383   const PetscScalar *x, *v;
384   PetscScalar       *y, sum1, sum2, sum3;
385   PetscInt           n, i, jrow, j;
386   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
387 
388   PetscFunctionBegin;
389   if (yy != zz) PetscCall(VecCopy(yy, zz));
390   PetscCall(VecGetArrayRead(xx, &x));
391   PetscCall(VecGetArray(zz, &y));
392   idx = a->j;
393   v   = a->a;
394   ii  = a->i;
395 
396   for (i = 0; i < m; i++) {
397     jrow = ii[i];
398     n    = ii[i + 1] - jrow;
399     sum1 = 0.0;
400     sum2 = 0.0;
401     sum3 = 0.0;
402     for (j = 0; j < n; j++) {
403       sum1 += v[jrow] * x[3 * idx[jrow]];
404       sum2 += v[jrow] * x[3 * idx[jrow] + 1];
405       sum3 += v[jrow] * x[3 * idx[jrow] + 2];
406       jrow++;
407     }
408     y[3 * i] += sum1;
409     y[3 * i + 1] += sum2;
410     y[3 * i + 2] += sum3;
411   }
412 
413   PetscCall(PetscLogFlops(6.0 * a->nz));
414   PetscCall(VecRestoreArrayRead(xx, &x));
415   PetscCall(VecRestoreArray(zz, &y));
416   PetscFunctionReturn(0);
417 }
418 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) {
419   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
420   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
421   const PetscScalar *x, *v;
422   PetscScalar       *y, alpha1, alpha2, alpha3;
423   PetscInt           n, i;
424   const PetscInt     m = b->AIJ->rmap->n, *idx;
425 
426   PetscFunctionBegin;
427   if (yy != zz) PetscCall(VecCopy(yy, zz));
428   PetscCall(VecGetArrayRead(xx, &x));
429   PetscCall(VecGetArray(zz, &y));
430   for (i = 0; i < m; i++) {
431     idx    = a->j + a->i[i];
432     v      = a->a + a->i[i];
433     n      = a->i[i + 1] - a->i[i];
434     alpha1 = x[3 * i];
435     alpha2 = x[3 * i + 1];
436     alpha3 = x[3 * i + 2];
437     while (n-- > 0) {
438       y[3 * (*idx)] += alpha1 * (*v);
439       y[3 * (*idx) + 1] += alpha2 * (*v);
440       y[3 * (*idx) + 2] += alpha3 * (*v);
441       idx++;
442       v++;
443     }
444   }
445   PetscCall(PetscLogFlops(6.0 * a->nz));
446   PetscCall(VecRestoreArrayRead(xx, &x));
447   PetscCall(VecRestoreArray(zz, &y));
448   PetscFunctionReturn(0);
449 }
450 
451 /* ------------------------------------------------------------------------------*/
452 PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy) {
453   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
454   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
455   const PetscScalar *x, *v;
456   PetscScalar       *y, sum1, sum2, sum3, sum4;
457   PetscInt           nonzerorow = 0, n, i, jrow, j;
458   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
459 
460   PetscFunctionBegin;
461   PetscCall(VecGetArrayRead(xx, &x));
462   PetscCall(VecGetArray(yy, &y));
463   idx = a->j;
464   v   = a->a;
465   ii  = a->i;
466 
467   for (i = 0; i < m; i++) {
468     jrow = ii[i];
469     n    = ii[i + 1] - jrow;
470     sum1 = 0.0;
471     sum2 = 0.0;
472     sum3 = 0.0;
473     sum4 = 0.0;
474     nonzerorow += (n > 0);
475     for (j = 0; j < n; j++) {
476       sum1 += v[jrow] * x[4 * idx[jrow]];
477       sum2 += v[jrow] * x[4 * idx[jrow] + 1];
478       sum3 += v[jrow] * x[4 * idx[jrow] + 2];
479       sum4 += v[jrow] * x[4 * idx[jrow] + 3];
480       jrow++;
481     }
482     y[4 * i]     = sum1;
483     y[4 * i + 1] = sum2;
484     y[4 * i + 2] = sum3;
485     y[4 * i + 3] = sum4;
486   }
487 
488   PetscCall(PetscLogFlops(8.0 * a->nz - 4.0 * nonzerorow));
489   PetscCall(VecRestoreArrayRead(xx, &x));
490   PetscCall(VecRestoreArray(yy, &y));
491   PetscFunctionReturn(0);
492 }
493 
494 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy) {
495   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
496   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
497   const PetscScalar *x, *v;
498   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4;
499   PetscInt           n, i;
500   const PetscInt     m = b->AIJ->rmap->n, *idx;
501 
502   PetscFunctionBegin;
503   PetscCall(VecSet(yy, 0.0));
504   PetscCall(VecGetArrayRead(xx, &x));
505   PetscCall(VecGetArray(yy, &y));
506   for (i = 0; i < m; i++) {
507     idx    = a->j + a->i[i];
508     v      = a->a + a->i[i];
509     n      = a->i[i + 1] - a->i[i];
510     alpha1 = x[4 * i];
511     alpha2 = x[4 * i + 1];
512     alpha3 = x[4 * i + 2];
513     alpha4 = x[4 * i + 3];
514     while (n-- > 0) {
515       y[4 * (*idx)] += alpha1 * (*v);
516       y[4 * (*idx) + 1] += alpha2 * (*v);
517       y[4 * (*idx) + 2] += alpha3 * (*v);
518       y[4 * (*idx) + 3] += alpha4 * (*v);
519       idx++;
520       v++;
521     }
522   }
523   PetscCall(PetscLogFlops(8.0 * a->nz));
524   PetscCall(VecRestoreArrayRead(xx, &x));
525   PetscCall(VecRestoreArray(yy, &y));
526   PetscFunctionReturn(0);
527 }
528 
529 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) {
530   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
531   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
532   const PetscScalar *x, *v;
533   PetscScalar       *y, sum1, sum2, sum3, sum4;
534   PetscInt           n, i, jrow, j;
535   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
536 
537   PetscFunctionBegin;
538   if (yy != zz) PetscCall(VecCopy(yy, zz));
539   PetscCall(VecGetArrayRead(xx, &x));
540   PetscCall(VecGetArray(zz, &y));
541   idx = a->j;
542   v   = a->a;
543   ii  = a->i;
544 
545   for (i = 0; i < m; i++) {
546     jrow = ii[i];
547     n    = ii[i + 1] - jrow;
548     sum1 = 0.0;
549     sum2 = 0.0;
550     sum3 = 0.0;
551     sum4 = 0.0;
552     for (j = 0; j < n; j++) {
553       sum1 += v[jrow] * x[4 * idx[jrow]];
554       sum2 += v[jrow] * x[4 * idx[jrow] + 1];
555       sum3 += v[jrow] * x[4 * idx[jrow] + 2];
556       sum4 += v[jrow] * x[4 * idx[jrow] + 3];
557       jrow++;
558     }
559     y[4 * i] += sum1;
560     y[4 * i + 1] += sum2;
561     y[4 * i + 2] += sum3;
562     y[4 * i + 3] += sum4;
563   }
564 
565   PetscCall(PetscLogFlops(8.0 * a->nz));
566   PetscCall(VecRestoreArrayRead(xx, &x));
567   PetscCall(VecRestoreArray(zz, &y));
568   PetscFunctionReturn(0);
569 }
570 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) {
571   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
572   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
573   const PetscScalar *x, *v;
574   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4;
575   PetscInt           n, i;
576   const PetscInt     m = b->AIJ->rmap->n, *idx;
577 
578   PetscFunctionBegin;
579   if (yy != zz) PetscCall(VecCopy(yy, zz));
580   PetscCall(VecGetArrayRead(xx, &x));
581   PetscCall(VecGetArray(zz, &y));
582 
583   for (i = 0; i < m; i++) {
584     idx    = a->j + a->i[i];
585     v      = a->a + a->i[i];
586     n      = a->i[i + 1] - a->i[i];
587     alpha1 = x[4 * i];
588     alpha2 = x[4 * i + 1];
589     alpha3 = x[4 * i + 2];
590     alpha4 = x[4 * i + 3];
591     while (n-- > 0) {
592       y[4 * (*idx)] += alpha1 * (*v);
593       y[4 * (*idx) + 1] += alpha2 * (*v);
594       y[4 * (*idx) + 2] += alpha3 * (*v);
595       y[4 * (*idx) + 3] += alpha4 * (*v);
596       idx++;
597       v++;
598     }
599   }
600   PetscCall(PetscLogFlops(8.0 * a->nz));
601   PetscCall(VecRestoreArrayRead(xx, &x));
602   PetscCall(VecRestoreArray(zz, &y));
603   PetscFunctionReturn(0);
604 }
605 /* ------------------------------------------------------------------------------*/
606 
607 PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy) {
608   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
609   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
610   const PetscScalar *x, *v;
611   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5;
612   PetscInt           nonzerorow = 0, n, i, jrow, j;
613   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
614 
615   PetscFunctionBegin;
616   PetscCall(VecGetArrayRead(xx, &x));
617   PetscCall(VecGetArray(yy, &y));
618   idx = a->j;
619   v   = a->a;
620   ii  = a->i;
621 
622   for (i = 0; i < m; i++) {
623     jrow = ii[i];
624     n    = ii[i + 1] - jrow;
625     sum1 = 0.0;
626     sum2 = 0.0;
627     sum3 = 0.0;
628     sum4 = 0.0;
629     sum5 = 0.0;
630 
631     nonzerorow += (n > 0);
632     for (j = 0; j < n; j++) {
633       sum1 += v[jrow] * x[5 * idx[jrow]];
634       sum2 += v[jrow] * x[5 * idx[jrow] + 1];
635       sum3 += v[jrow] * x[5 * idx[jrow] + 2];
636       sum4 += v[jrow] * x[5 * idx[jrow] + 3];
637       sum5 += v[jrow] * x[5 * idx[jrow] + 4];
638       jrow++;
639     }
640     y[5 * i]     = sum1;
641     y[5 * i + 1] = sum2;
642     y[5 * i + 2] = sum3;
643     y[5 * i + 3] = sum4;
644     y[5 * i + 4] = sum5;
645   }
646 
647   PetscCall(PetscLogFlops(10.0 * a->nz - 5.0 * nonzerorow));
648   PetscCall(VecRestoreArrayRead(xx, &x));
649   PetscCall(VecRestoreArray(yy, &y));
650   PetscFunctionReturn(0);
651 }
652 
653 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy) {
654   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
655   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
656   const PetscScalar *x, *v;
657   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5;
658   PetscInt           n, i;
659   const PetscInt     m = b->AIJ->rmap->n, *idx;
660 
661   PetscFunctionBegin;
662   PetscCall(VecSet(yy, 0.0));
663   PetscCall(VecGetArrayRead(xx, &x));
664   PetscCall(VecGetArray(yy, &y));
665 
666   for (i = 0; i < m; i++) {
667     idx    = a->j + a->i[i];
668     v      = a->a + a->i[i];
669     n      = a->i[i + 1] - a->i[i];
670     alpha1 = x[5 * i];
671     alpha2 = x[5 * i + 1];
672     alpha3 = x[5 * i + 2];
673     alpha4 = x[5 * i + 3];
674     alpha5 = x[5 * i + 4];
675     while (n-- > 0) {
676       y[5 * (*idx)] += alpha1 * (*v);
677       y[5 * (*idx) + 1] += alpha2 * (*v);
678       y[5 * (*idx) + 2] += alpha3 * (*v);
679       y[5 * (*idx) + 3] += alpha4 * (*v);
680       y[5 * (*idx) + 4] += alpha5 * (*v);
681       idx++;
682       v++;
683     }
684   }
685   PetscCall(PetscLogFlops(10.0 * a->nz));
686   PetscCall(VecRestoreArrayRead(xx, &x));
687   PetscCall(VecRestoreArray(yy, &y));
688   PetscFunctionReturn(0);
689 }
690 
691 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) {
692   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
693   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
694   const PetscScalar *x, *v;
695   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5;
696   PetscInt           n, i, jrow, j;
697   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
698 
699   PetscFunctionBegin;
700   if (yy != zz) PetscCall(VecCopy(yy, zz));
701   PetscCall(VecGetArrayRead(xx, &x));
702   PetscCall(VecGetArray(zz, &y));
703   idx = a->j;
704   v   = a->a;
705   ii  = a->i;
706 
707   for (i = 0; i < m; i++) {
708     jrow = ii[i];
709     n    = ii[i + 1] - jrow;
710     sum1 = 0.0;
711     sum2 = 0.0;
712     sum3 = 0.0;
713     sum4 = 0.0;
714     sum5 = 0.0;
715     for (j = 0; j < n; j++) {
716       sum1 += v[jrow] * x[5 * idx[jrow]];
717       sum2 += v[jrow] * x[5 * idx[jrow] + 1];
718       sum3 += v[jrow] * x[5 * idx[jrow] + 2];
719       sum4 += v[jrow] * x[5 * idx[jrow] + 3];
720       sum5 += v[jrow] * x[5 * idx[jrow] + 4];
721       jrow++;
722     }
723     y[5 * i] += sum1;
724     y[5 * i + 1] += sum2;
725     y[5 * i + 2] += sum3;
726     y[5 * i + 3] += sum4;
727     y[5 * i + 4] += sum5;
728   }
729 
730   PetscCall(PetscLogFlops(10.0 * a->nz));
731   PetscCall(VecRestoreArrayRead(xx, &x));
732   PetscCall(VecRestoreArray(zz, &y));
733   PetscFunctionReturn(0);
734 }
735 
736 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) {
737   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
738   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
739   const PetscScalar *x, *v;
740   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5;
741   PetscInt           n, i;
742   const PetscInt     m = b->AIJ->rmap->n, *idx;
743 
744   PetscFunctionBegin;
745   if (yy != zz) PetscCall(VecCopy(yy, zz));
746   PetscCall(VecGetArrayRead(xx, &x));
747   PetscCall(VecGetArray(zz, &y));
748 
749   for (i = 0; i < m; i++) {
750     idx    = a->j + a->i[i];
751     v      = a->a + a->i[i];
752     n      = a->i[i + 1] - a->i[i];
753     alpha1 = x[5 * i];
754     alpha2 = x[5 * i + 1];
755     alpha3 = x[5 * i + 2];
756     alpha4 = x[5 * i + 3];
757     alpha5 = x[5 * i + 4];
758     while (n-- > 0) {
759       y[5 * (*idx)] += alpha1 * (*v);
760       y[5 * (*idx) + 1] += alpha2 * (*v);
761       y[5 * (*idx) + 2] += alpha3 * (*v);
762       y[5 * (*idx) + 3] += alpha4 * (*v);
763       y[5 * (*idx) + 4] += alpha5 * (*v);
764       idx++;
765       v++;
766     }
767   }
768   PetscCall(PetscLogFlops(10.0 * a->nz));
769   PetscCall(VecRestoreArrayRead(xx, &x));
770   PetscCall(VecRestoreArray(zz, &y));
771   PetscFunctionReturn(0);
772 }
773 
774 /* ------------------------------------------------------------------------------*/
775 PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy) {
776   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
777   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
778   const PetscScalar *x, *v;
779   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6;
780   PetscInt           nonzerorow = 0, n, i, jrow, j;
781   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
782 
783   PetscFunctionBegin;
784   PetscCall(VecGetArrayRead(xx, &x));
785   PetscCall(VecGetArray(yy, &y));
786   idx = a->j;
787   v   = a->a;
788   ii  = a->i;
789 
790   for (i = 0; i < m; i++) {
791     jrow = ii[i];
792     n    = ii[i + 1] - jrow;
793     sum1 = 0.0;
794     sum2 = 0.0;
795     sum3 = 0.0;
796     sum4 = 0.0;
797     sum5 = 0.0;
798     sum6 = 0.0;
799 
800     nonzerorow += (n > 0);
801     for (j = 0; j < n; j++) {
802       sum1 += v[jrow] * x[6 * idx[jrow]];
803       sum2 += v[jrow] * x[6 * idx[jrow] + 1];
804       sum3 += v[jrow] * x[6 * idx[jrow] + 2];
805       sum4 += v[jrow] * x[6 * idx[jrow] + 3];
806       sum5 += v[jrow] * x[6 * idx[jrow] + 4];
807       sum6 += v[jrow] * x[6 * idx[jrow] + 5];
808       jrow++;
809     }
810     y[6 * i]     = sum1;
811     y[6 * i + 1] = sum2;
812     y[6 * i + 2] = sum3;
813     y[6 * i + 3] = sum4;
814     y[6 * i + 4] = sum5;
815     y[6 * i + 5] = sum6;
816   }
817 
818   PetscCall(PetscLogFlops(12.0 * a->nz - 6.0 * nonzerorow));
819   PetscCall(VecRestoreArrayRead(xx, &x));
820   PetscCall(VecRestoreArray(yy, &y));
821   PetscFunctionReturn(0);
822 }
823 
824 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy) {
825   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
826   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
827   const PetscScalar *x, *v;
828   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6;
829   PetscInt           n, i;
830   const PetscInt     m = b->AIJ->rmap->n, *idx;
831 
832   PetscFunctionBegin;
833   PetscCall(VecSet(yy, 0.0));
834   PetscCall(VecGetArrayRead(xx, &x));
835   PetscCall(VecGetArray(yy, &y));
836 
837   for (i = 0; i < m; i++) {
838     idx    = a->j + a->i[i];
839     v      = a->a + a->i[i];
840     n      = a->i[i + 1] - a->i[i];
841     alpha1 = x[6 * i];
842     alpha2 = x[6 * i + 1];
843     alpha3 = x[6 * i + 2];
844     alpha4 = x[6 * i + 3];
845     alpha5 = x[6 * i + 4];
846     alpha6 = x[6 * i + 5];
847     while (n-- > 0) {
848       y[6 * (*idx)] += alpha1 * (*v);
849       y[6 * (*idx) + 1] += alpha2 * (*v);
850       y[6 * (*idx) + 2] += alpha3 * (*v);
851       y[6 * (*idx) + 3] += alpha4 * (*v);
852       y[6 * (*idx) + 4] += alpha5 * (*v);
853       y[6 * (*idx) + 5] += alpha6 * (*v);
854       idx++;
855       v++;
856     }
857   }
858   PetscCall(PetscLogFlops(12.0 * a->nz));
859   PetscCall(VecRestoreArrayRead(xx, &x));
860   PetscCall(VecRestoreArray(yy, &y));
861   PetscFunctionReturn(0);
862 }
863 
864 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) {
865   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
866   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
867   const PetscScalar *x, *v;
868   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6;
869   PetscInt           n, i, jrow, j;
870   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
871 
872   PetscFunctionBegin;
873   if (yy != zz) PetscCall(VecCopy(yy, zz));
874   PetscCall(VecGetArrayRead(xx, &x));
875   PetscCall(VecGetArray(zz, &y));
876   idx = a->j;
877   v   = a->a;
878   ii  = a->i;
879 
880   for (i = 0; i < m; i++) {
881     jrow = ii[i];
882     n    = ii[i + 1] - jrow;
883     sum1 = 0.0;
884     sum2 = 0.0;
885     sum3 = 0.0;
886     sum4 = 0.0;
887     sum5 = 0.0;
888     sum6 = 0.0;
889     for (j = 0; j < n; j++) {
890       sum1 += v[jrow] * x[6 * idx[jrow]];
891       sum2 += v[jrow] * x[6 * idx[jrow] + 1];
892       sum3 += v[jrow] * x[6 * idx[jrow] + 2];
893       sum4 += v[jrow] * x[6 * idx[jrow] + 3];
894       sum5 += v[jrow] * x[6 * idx[jrow] + 4];
895       sum6 += v[jrow] * x[6 * idx[jrow] + 5];
896       jrow++;
897     }
898     y[6 * i] += sum1;
899     y[6 * i + 1] += sum2;
900     y[6 * i + 2] += sum3;
901     y[6 * i + 3] += sum4;
902     y[6 * i + 4] += sum5;
903     y[6 * i + 5] += sum6;
904   }
905 
906   PetscCall(PetscLogFlops(12.0 * a->nz));
907   PetscCall(VecRestoreArrayRead(xx, &x));
908   PetscCall(VecRestoreArray(zz, &y));
909   PetscFunctionReturn(0);
910 }
911 
912 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) {
913   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
914   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
915   const PetscScalar *x, *v;
916   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6;
917   PetscInt           n, i;
918   const PetscInt     m = b->AIJ->rmap->n, *idx;
919 
920   PetscFunctionBegin;
921   if (yy != zz) PetscCall(VecCopy(yy, zz));
922   PetscCall(VecGetArrayRead(xx, &x));
923   PetscCall(VecGetArray(zz, &y));
924 
925   for (i = 0; i < m; i++) {
926     idx    = a->j + a->i[i];
927     v      = a->a + a->i[i];
928     n      = a->i[i + 1] - a->i[i];
929     alpha1 = x[6 * i];
930     alpha2 = x[6 * i + 1];
931     alpha3 = x[6 * i + 2];
932     alpha4 = x[6 * i + 3];
933     alpha5 = x[6 * i + 4];
934     alpha6 = x[6 * i + 5];
935     while (n-- > 0) {
936       y[6 * (*idx)] += alpha1 * (*v);
937       y[6 * (*idx) + 1] += alpha2 * (*v);
938       y[6 * (*idx) + 2] += alpha3 * (*v);
939       y[6 * (*idx) + 3] += alpha4 * (*v);
940       y[6 * (*idx) + 4] += alpha5 * (*v);
941       y[6 * (*idx) + 5] += alpha6 * (*v);
942       idx++;
943       v++;
944     }
945   }
946   PetscCall(PetscLogFlops(12.0 * a->nz));
947   PetscCall(VecRestoreArrayRead(xx, &x));
948   PetscCall(VecRestoreArray(zz, &y));
949   PetscFunctionReturn(0);
950 }
951 
952 /* ------------------------------------------------------------------------------*/
953 PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy) {
954   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
955   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
956   const PetscScalar *x, *v;
957   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
958   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
959   PetscInt           nonzerorow = 0, n, i, jrow, j;
960 
961   PetscFunctionBegin;
962   PetscCall(VecGetArrayRead(xx, &x));
963   PetscCall(VecGetArray(yy, &y));
964   idx = a->j;
965   v   = a->a;
966   ii  = a->i;
967 
968   for (i = 0; i < m; i++) {
969     jrow = ii[i];
970     n    = ii[i + 1] - jrow;
971     sum1 = 0.0;
972     sum2 = 0.0;
973     sum3 = 0.0;
974     sum4 = 0.0;
975     sum5 = 0.0;
976     sum6 = 0.0;
977     sum7 = 0.0;
978 
979     nonzerorow += (n > 0);
980     for (j = 0; j < n; j++) {
981       sum1 += v[jrow] * x[7 * idx[jrow]];
982       sum2 += v[jrow] * x[7 * idx[jrow] + 1];
983       sum3 += v[jrow] * x[7 * idx[jrow] + 2];
984       sum4 += v[jrow] * x[7 * idx[jrow] + 3];
985       sum5 += v[jrow] * x[7 * idx[jrow] + 4];
986       sum6 += v[jrow] * x[7 * idx[jrow] + 5];
987       sum7 += v[jrow] * x[7 * idx[jrow] + 6];
988       jrow++;
989     }
990     y[7 * i]     = sum1;
991     y[7 * i + 1] = sum2;
992     y[7 * i + 2] = sum3;
993     y[7 * i + 3] = sum4;
994     y[7 * i + 4] = sum5;
995     y[7 * i + 5] = sum6;
996     y[7 * i + 6] = sum7;
997   }
998 
999   PetscCall(PetscLogFlops(14.0 * a->nz - 7.0 * nonzerorow));
1000   PetscCall(VecRestoreArrayRead(xx, &x));
1001   PetscCall(VecRestoreArray(yy, &y));
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy) {
1006   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1007   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1008   const PetscScalar *x, *v;
1009   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7;
1010   const PetscInt     m = b->AIJ->rmap->n, *idx;
1011   PetscInt           n, i;
1012 
1013   PetscFunctionBegin;
1014   PetscCall(VecSet(yy, 0.0));
1015   PetscCall(VecGetArrayRead(xx, &x));
1016   PetscCall(VecGetArray(yy, &y));
1017 
1018   for (i = 0; i < m; i++) {
1019     idx    = a->j + a->i[i];
1020     v      = a->a + a->i[i];
1021     n      = a->i[i + 1] - a->i[i];
1022     alpha1 = x[7 * i];
1023     alpha2 = x[7 * i + 1];
1024     alpha3 = x[7 * i + 2];
1025     alpha4 = x[7 * i + 3];
1026     alpha5 = x[7 * i + 4];
1027     alpha6 = x[7 * i + 5];
1028     alpha7 = x[7 * i + 6];
1029     while (n-- > 0) {
1030       y[7 * (*idx)] += alpha1 * (*v);
1031       y[7 * (*idx) + 1] += alpha2 * (*v);
1032       y[7 * (*idx) + 2] += alpha3 * (*v);
1033       y[7 * (*idx) + 3] += alpha4 * (*v);
1034       y[7 * (*idx) + 4] += alpha5 * (*v);
1035       y[7 * (*idx) + 5] += alpha6 * (*v);
1036       y[7 * (*idx) + 6] += alpha7 * (*v);
1037       idx++;
1038       v++;
1039     }
1040   }
1041   PetscCall(PetscLogFlops(14.0 * a->nz));
1042   PetscCall(VecRestoreArrayRead(xx, &x));
1043   PetscCall(VecRestoreArray(yy, &y));
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) {
1048   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1049   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1050   const PetscScalar *x, *v;
1051   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1052   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1053   PetscInt           n, i, jrow, j;
1054 
1055   PetscFunctionBegin;
1056   if (yy != zz) PetscCall(VecCopy(yy, zz));
1057   PetscCall(VecGetArrayRead(xx, &x));
1058   PetscCall(VecGetArray(zz, &y));
1059   idx = a->j;
1060   v   = a->a;
1061   ii  = a->i;
1062 
1063   for (i = 0; i < m; i++) {
1064     jrow = ii[i];
1065     n    = ii[i + 1] - jrow;
1066     sum1 = 0.0;
1067     sum2 = 0.0;
1068     sum3 = 0.0;
1069     sum4 = 0.0;
1070     sum5 = 0.0;
1071     sum6 = 0.0;
1072     sum7 = 0.0;
1073     for (j = 0; j < n; j++) {
1074       sum1 += v[jrow] * x[7 * idx[jrow]];
1075       sum2 += v[jrow] * x[7 * idx[jrow] + 1];
1076       sum3 += v[jrow] * x[7 * idx[jrow] + 2];
1077       sum4 += v[jrow] * x[7 * idx[jrow] + 3];
1078       sum5 += v[jrow] * x[7 * idx[jrow] + 4];
1079       sum6 += v[jrow] * x[7 * idx[jrow] + 5];
1080       sum7 += v[jrow] * x[7 * idx[jrow] + 6];
1081       jrow++;
1082     }
1083     y[7 * i] += sum1;
1084     y[7 * i + 1] += sum2;
1085     y[7 * i + 2] += sum3;
1086     y[7 * i + 3] += sum4;
1087     y[7 * i + 4] += sum5;
1088     y[7 * i + 5] += sum6;
1089     y[7 * i + 6] += sum7;
1090   }
1091 
1092   PetscCall(PetscLogFlops(14.0 * a->nz));
1093   PetscCall(VecRestoreArrayRead(xx, &x));
1094   PetscCall(VecRestoreArray(zz, &y));
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) {
1099   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1100   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1101   const PetscScalar *x, *v;
1102   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7;
1103   const PetscInt     m = b->AIJ->rmap->n, *idx;
1104   PetscInt           n, i;
1105 
1106   PetscFunctionBegin;
1107   if (yy != zz) PetscCall(VecCopy(yy, zz));
1108   PetscCall(VecGetArrayRead(xx, &x));
1109   PetscCall(VecGetArray(zz, &y));
1110   for (i = 0; i < m; i++) {
1111     idx    = a->j + a->i[i];
1112     v      = a->a + a->i[i];
1113     n      = a->i[i + 1] - a->i[i];
1114     alpha1 = x[7 * i];
1115     alpha2 = x[7 * i + 1];
1116     alpha3 = x[7 * i + 2];
1117     alpha4 = x[7 * i + 3];
1118     alpha5 = x[7 * i + 4];
1119     alpha6 = x[7 * i + 5];
1120     alpha7 = x[7 * i + 6];
1121     while (n-- > 0) {
1122       y[7 * (*idx)] += alpha1 * (*v);
1123       y[7 * (*idx) + 1] += alpha2 * (*v);
1124       y[7 * (*idx) + 2] += alpha3 * (*v);
1125       y[7 * (*idx) + 3] += alpha4 * (*v);
1126       y[7 * (*idx) + 4] += alpha5 * (*v);
1127       y[7 * (*idx) + 5] += alpha6 * (*v);
1128       y[7 * (*idx) + 6] += alpha7 * (*v);
1129       idx++;
1130       v++;
1131     }
1132   }
1133   PetscCall(PetscLogFlops(14.0 * a->nz));
1134   PetscCall(VecRestoreArrayRead(xx, &x));
1135   PetscCall(VecRestoreArray(zz, &y));
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy) {
1140   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1141   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1142   const PetscScalar *x, *v;
1143   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1144   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1145   PetscInt           nonzerorow = 0, n, i, jrow, j;
1146 
1147   PetscFunctionBegin;
1148   PetscCall(VecGetArrayRead(xx, &x));
1149   PetscCall(VecGetArray(yy, &y));
1150   idx = a->j;
1151   v   = a->a;
1152   ii  = a->i;
1153 
1154   for (i = 0; i < m; i++) {
1155     jrow = ii[i];
1156     n    = ii[i + 1] - jrow;
1157     sum1 = 0.0;
1158     sum2 = 0.0;
1159     sum3 = 0.0;
1160     sum4 = 0.0;
1161     sum5 = 0.0;
1162     sum6 = 0.0;
1163     sum7 = 0.0;
1164     sum8 = 0.0;
1165 
1166     nonzerorow += (n > 0);
1167     for (j = 0; j < n; j++) {
1168       sum1 += v[jrow] * x[8 * idx[jrow]];
1169       sum2 += v[jrow] * x[8 * idx[jrow] + 1];
1170       sum3 += v[jrow] * x[8 * idx[jrow] + 2];
1171       sum4 += v[jrow] * x[8 * idx[jrow] + 3];
1172       sum5 += v[jrow] * x[8 * idx[jrow] + 4];
1173       sum6 += v[jrow] * x[8 * idx[jrow] + 5];
1174       sum7 += v[jrow] * x[8 * idx[jrow] + 6];
1175       sum8 += v[jrow] * x[8 * idx[jrow] + 7];
1176       jrow++;
1177     }
1178     y[8 * i]     = sum1;
1179     y[8 * i + 1] = sum2;
1180     y[8 * i + 2] = sum3;
1181     y[8 * i + 3] = sum4;
1182     y[8 * i + 4] = sum5;
1183     y[8 * i + 5] = sum6;
1184     y[8 * i + 6] = sum7;
1185     y[8 * i + 7] = sum8;
1186   }
1187 
1188   PetscCall(PetscLogFlops(16.0 * a->nz - 8.0 * nonzerorow));
1189   PetscCall(VecRestoreArrayRead(xx, &x));
1190   PetscCall(VecRestoreArray(yy, &y));
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy) {
1195   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1196   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1197   const PetscScalar *x, *v;
1198   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
1199   const PetscInt     m = b->AIJ->rmap->n, *idx;
1200   PetscInt           n, i;
1201 
1202   PetscFunctionBegin;
1203   PetscCall(VecSet(yy, 0.0));
1204   PetscCall(VecGetArrayRead(xx, &x));
1205   PetscCall(VecGetArray(yy, &y));
1206 
1207   for (i = 0; i < m; i++) {
1208     idx    = a->j + a->i[i];
1209     v      = a->a + a->i[i];
1210     n      = a->i[i + 1] - a->i[i];
1211     alpha1 = x[8 * i];
1212     alpha2 = x[8 * i + 1];
1213     alpha3 = x[8 * i + 2];
1214     alpha4 = x[8 * i + 3];
1215     alpha5 = x[8 * i + 4];
1216     alpha6 = x[8 * i + 5];
1217     alpha7 = x[8 * i + 6];
1218     alpha8 = x[8 * i + 7];
1219     while (n-- > 0) {
1220       y[8 * (*idx)] += alpha1 * (*v);
1221       y[8 * (*idx) + 1] += alpha2 * (*v);
1222       y[8 * (*idx) + 2] += alpha3 * (*v);
1223       y[8 * (*idx) + 3] += alpha4 * (*v);
1224       y[8 * (*idx) + 4] += alpha5 * (*v);
1225       y[8 * (*idx) + 5] += alpha6 * (*v);
1226       y[8 * (*idx) + 6] += alpha7 * (*v);
1227       y[8 * (*idx) + 7] += alpha8 * (*v);
1228       idx++;
1229       v++;
1230     }
1231   }
1232   PetscCall(PetscLogFlops(16.0 * a->nz));
1233   PetscCall(VecRestoreArrayRead(xx, &x));
1234   PetscCall(VecRestoreArray(yy, &y));
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) {
1239   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1240   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1241   const PetscScalar *x, *v;
1242   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1243   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1244   PetscInt           n, i, jrow, j;
1245 
1246   PetscFunctionBegin;
1247   if (yy != zz) PetscCall(VecCopy(yy, zz));
1248   PetscCall(VecGetArrayRead(xx, &x));
1249   PetscCall(VecGetArray(zz, &y));
1250   idx = a->j;
1251   v   = a->a;
1252   ii  = a->i;
1253 
1254   for (i = 0; i < m; i++) {
1255     jrow = ii[i];
1256     n    = ii[i + 1] - jrow;
1257     sum1 = 0.0;
1258     sum2 = 0.0;
1259     sum3 = 0.0;
1260     sum4 = 0.0;
1261     sum5 = 0.0;
1262     sum6 = 0.0;
1263     sum7 = 0.0;
1264     sum8 = 0.0;
1265     for (j = 0; j < n; j++) {
1266       sum1 += v[jrow] * x[8 * idx[jrow]];
1267       sum2 += v[jrow] * x[8 * idx[jrow] + 1];
1268       sum3 += v[jrow] * x[8 * idx[jrow] + 2];
1269       sum4 += v[jrow] * x[8 * idx[jrow] + 3];
1270       sum5 += v[jrow] * x[8 * idx[jrow] + 4];
1271       sum6 += v[jrow] * x[8 * idx[jrow] + 5];
1272       sum7 += v[jrow] * x[8 * idx[jrow] + 6];
1273       sum8 += v[jrow] * x[8 * idx[jrow] + 7];
1274       jrow++;
1275     }
1276     y[8 * i] += sum1;
1277     y[8 * i + 1] += sum2;
1278     y[8 * i + 2] += sum3;
1279     y[8 * i + 3] += sum4;
1280     y[8 * i + 4] += sum5;
1281     y[8 * i + 5] += sum6;
1282     y[8 * i + 6] += sum7;
1283     y[8 * i + 7] += sum8;
1284   }
1285 
1286   PetscCall(PetscLogFlops(16.0 * a->nz));
1287   PetscCall(VecRestoreArrayRead(xx, &x));
1288   PetscCall(VecRestoreArray(zz, &y));
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) {
1293   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1294   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1295   const PetscScalar *x, *v;
1296   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
1297   const PetscInt     m = b->AIJ->rmap->n, *idx;
1298   PetscInt           n, i;
1299 
1300   PetscFunctionBegin;
1301   if (yy != zz) PetscCall(VecCopy(yy, zz));
1302   PetscCall(VecGetArrayRead(xx, &x));
1303   PetscCall(VecGetArray(zz, &y));
1304   for (i = 0; i < m; i++) {
1305     idx    = a->j + a->i[i];
1306     v      = a->a + a->i[i];
1307     n      = a->i[i + 1] - a->i[i];
1308     alpha1 = x[8 * i];
1309     alpha2 = x[8 * i + 1];
1310     alpha3 = x[8 * i + 2];
1311     alpha4 = x[8 * i + 3];
1312     alpha5 = x[8 * i + 4];
1313     alpha6 = x[8 * i + 5];
1314     alpha7 = x[8 * i + 6];
1315     alpha8 = x[8 * i + 7];
1316     while (n-- > 0) {
1317       y[8 * (*idx)] += alpha1 * (*v);
1318       y[8 * (*idx) + 1] += alpha2 * (*v);
1319       y[8 * (*idx) + 2] += alpha3 * (*v);
1320       y[8 * (*idx) + 3] += alpha4 * (*v);
1321       y[8 * (*idx) + 4] += alpha5 * (*v);
1322       y[8 * (*idx) + 5] += alpha6 * (*v);
1323       y[8 * (*idx) + 6] += alpha7 * (*v);
1324       y[8 * (*idx) + 7] += alpha8 * (*v);
1325       idx++;
1326       v++;
1327     }
1328   }
1329   PetscCall(PetscLogFlops(16.0 * a->nz));
1330   PetscCall(VecRestoreArrayRead(xx, &x));
1331   PetscCall(VecRestoreArray(zz, &y));
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 /* ------------------------------------------------------------------------------*/
1336 PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy) {
1337   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1338   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1339   const PetscScalar *x, *v;
1340   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1341   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1342   PetscInt           nonzerorow = 0, n, i, jrow, j;
1343 
1344   PetscFunctionBegin;
1345   PetscCall(VecGetArrayRead(xx, &x));
1346   PetscCall(VecGetArray(yy, &y));
1347   idx = a->j;
1348   v   = a->a;
1349   ii  = a->i;
1350 
1351   for (i = 0; i < m; i++) {
1352     jrow = ii[i];
1353     n    = ii[i + 1] - jrow;
1354     sum1 = 0.0;
1355     sum2 = 0.0;
1356     sum3 = 0.0;
1357     sum4 = 0.0;
1358     sum5 = 0.0;
1359     sum6 = 0.0;
1360     sum7 = 0.0;
1361     sum8 = 0.0;
1362     sum9 = 0.0;
1363 
1364     nonzerorow += (n > 0);
1365     for (j = 0; j < n; j++) {
1366       sum1 += v[jrow] * x[9 * idx[jrow]];
1367       sum2 += v[jrow] * x[9 * idx[jrow] + 1];
1368       sum3 += v[jrow] * x[9 * idx[jrow] + 2];
1369       sum4 += v[jrow] * x[9 * idx[jrow] + 3];
1370       sum5 += v[jrow] * x[9 * idx[jrow] + 4];
1371       sum6 += v[jrow] * x[9 * idx[jrow] + 5];
1372       sum7 += v[jrow] * x[9 * idx[jrow] + 6];
1373       sum8 += v[jrow] * x[9 * idx[jrow] + 7];
1374       sum9 += v[jrow] * x[9 * idx[jrow] + 8];
1375       jrow++;
1376     }
1377     y[9 * i]     = sum1;
1378     y[9 * i + 1] = sum2;
1379     y[9 * i + 2] = sum3;
1380     y[9 * i + 3] = sum4;
1381     y[9 * i + 4] = sum5;
1382     y[9 * i + 5] = sum6;
1383     y[9 * i + 6] = sum7;
1384     y[9 * i + 7] = sum8;
1385     y[9 * i + 8] = sum9;
1386   }
1387 
1388   PetscCall(PetscLogFlops(18.0 * a->nz - 9 * nonzerorow));
1389   PetscCall(VecRestoreArrayRead(xx, &x));
1390   PetscCall(VecRestoreArray(yy, &y));
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 /* ------------------------------------------------------------------------------*/
1395 
1396 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy) {
1397   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1398   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1399   const PetscScalar *x, *v;
1400   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9;
1401   const PetscInt     m = b->AIJ->rmap->n, *idx;
1402   PetscInt           n, i;
1403 
1404   PetscFunctionBegin;
1405   PetscCall(VecSet(yy, 0.0));
1406   PetscCall(VecGetArrayRead(xx, &x));
1407   PetscCall(VecGetArray(yy, &y));
1408 
1409   for (i = 0; i < m; i++) {
1410     idx    = a->j + a->i[i];
1411     v      = a->a + a->i[i];
1412     n      = a->i[i + 1] - a->i[i];
1413     alpha1 = x[9 * i];
1414     alpha2 = x[9 * i + 1];
1415     alpha3 = x[9 * i + 2];
1416     alpha4 = x[9 * i + 3];
1417     alpha5 = x[9 * i + 4];
1418     alpha6 = x[9 * i + 5];
1419     alpha7 = x[9 * i + 6];
1420     alpha8 = x[9 * i + 7];
1421     alpha9 = x[9 * i + 8];
1422     while (n-- > 0) {
1423       y[9 * (*idx)] += alpha1 * (*v);
1424       y[9 * (*idx) + 1] += alpha2 * (*v);
1425       y[9 * (*idx) + 2] += alpha3 * (*v);
1426       y[9 * (*idx) + 3] += alpha4 * (*v);
1427       y[9 * (*idx) + 4] += alpha5 * (*v);
1428       y[9 * (*idx) + 5] += alpha6 * (*v);
1429       y[9 * (*idx) + 6] += alpha7 * (*v);
1430       y[9 * (*idx) + 7] += alpha8 * (*v);
1431       y[9 * (*idx) + 8] += alpha9 * (*v);
1432       idx++;
1433       v++;
1434     }
1435   }
1436   PetscCall(PetscLogFlops(18.0 * a->nz));
1437   PetscCall(VecRestoreArrayRead(xx, &x));
1438   PetscCall(VecRestoreArray(yy, &y));
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) {
1443   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1444   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1445   const PetscScalar *x, *v;
1446   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1447   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1448   PetscInt           n, i, jrow, j;
1449 
1450   PetscFunctionBegin;
1451   if (yy != zz) PetscCall(VecCopy(yy, zz));
1452   PetscCall(VecGetArrayRead(xx, &x));
1453   PetscCall(VecGetArray(zz, &y));
1454   idx = a->j;
1455   v   = a->a;
1456   ii  = a->i;
1457 
1458   for (i = 0; i < m; i++) {
1459     jrow = ii[i];
1460     n    = ii[i + 1] - jrow;
1461     sum1 = 0.0;
1462     sum2 = 0.0;
1463     sum3 = 0.0;
1464     sum4 = 0.0;
1465     sum5 = 0.0;
1466     sum6 = 0.0;
1467     sum7 = 0.0;
1468     sum8 = 0.0;
1469     sum9 = 0.0;
1470     for (j = 0; j < n; j++) {
1471       sum1 += v[jrow] * x[9 * idx[jrow]];
1472       sum2 += v[jrow] * x[9 * idx[jrow] + 1];
1473       sum3 += v[jrow] * x[9 * idx[jrow] + 2];
1474       sum4 += v[jrow] * x[9 * idx[jrow] + 3];
1475       sum5 += v[jrow] * x[9 * idx[jrow] + 4];
1476       sum6 += v[jrow] * x[9 * idx[jrow] + 5];
1477       sum7 += v[jrow] * x[9 * idx[jrow] + 6];
1478       sum8 += v[jrow] * x[9 * idx[jrow] + 7];
1479       sum9 += v[jrow] * x[9 * idx[jrow] + 8];
1480       jrow++;
1481     }
1482     y[9 * i] += sum1;
1483     y[9 * i + 1] += sum2;
1484     y[9 * i + 2] += sum3;
1485     y[9 * i + 3] += sum4;
1486     y[9 * i + 4] += sum5;
1487     y[9 * i + 5] += sum6;
1488     y[9 * i + 6] += sum7;
1489     y[9 * i + 7] += sum8;
1490     y[9 * i + 8] += sum9;
1491   }
1492 
1493   PetscCall(PetscLogFlops(18.0 * a->nz));
1494   PetscCall(VecRestoreArrayRead(xx, &x));
1495   PetscCall(VecRestoreArray(zz, &y));
1496   PetscFunctionReturn(0);
1497 }
1498 
1499 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) {
1500   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1501   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1502   const PetscScalar *x, *v;
1503   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9;
1504   const PetscInt     m = b->AIJ->rmap->n, *idx;
1505   PetscInt           n, i;
1506 
1507   PetscFunctionBegin;
1508   if (yy != zz) PetscCall(VecCopy(yy, zz));
1509   PetscCall(VecGetArrayRead(xx, &x));
1510   PetscCall(VecGetArray(zz, &y));
1511   for (i = 0; i < m; i++) {
1512     idx    = a->j + a->i[i];
1513     v      = a->a + a->i[i];
1514     n      = a->i[i + 1] - a->i[i];
1515     alpha1 = x[9 * i];
1516     alpha2 = x[9 * i + 1];
1517     alpha3 = x[9 * i + 2];
1518     alpha4 = x[9 * i + 3];
1519     alpha5 = x[9 * i + 4];
1520     alpha6 = x[9 * i + 5];
1521     alpha7 = x[9 * i + 6];
1522     alpha8 = x[9 * i + 7];
1523     alpha9 = x[9 * i + 8];
1524     while (n-- > 0) {
1525       y[9 * (*idx)] += alpha1 * (*v);
1526       y[9 * (*idx) + 1] += alpha2 * (*v);
1527       y[9 * (*idx) + 2] += alpha3 * (*v);
1528       y[9 * (*idx) + 3] += alpha4 * (*v);
1529       y[9 * (*idx) + 4] += alpha5 * (*v);
1530       y[9 * (*idx) + 5] += alpha6 * (*v);
1531       y[9 * (*idx) + 6] += alpha7 * (*v);
1532       y[9 * (*idx) + 7] += alpha8 * (*v);
1533       y[9 * (*idx) + 8] += alpha9 * (*v);
1534       idx++;
1535       v++;
1536     }
1537   }
1538   PetscCall(PetscLogFlops(18.0 * a->nz));
1539   PetscCall(VecRestoreArrayRead(xx, &x));
1540   PetscCall(VecRestoreArray(zz, &y));
1541   PetscFunctionReturn(0);
1542 }
1543 PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy) {
1544   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1545   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1546   const PetscScalar *x, *v;
1547   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1548   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1549   PetscInt           nonzerorow = 0, n, i, jrow, j;
1550 
1551   PetscFunctionBegin;
1552   PetscCall(VecGetArrayRead(xx, &x));
1553   PetscCall(VecGetArray(yy, &y));
1554   idx = a->j;
1555   v   = a->a;
1556   ii  = a->i;
1557 
1558   for (i = 0; i < m; i++) {
1559     jrow  = ii[i];
1560     n     = ii[i + 1] - jrow;
1561     sum1  = 0.0;
1562     sum2  = 0.0;
1563     sum3  = 0.0;
1564     sum4  = 0.0;
1565     sum5  = 0.0;
1566     sum6  = 0.0;
1567     sum7  = 0.0;
1568     sum8  = 0.0;
1569     sum9  = 0.0;
1570     sum10 = 0.0;
1571 
1572     nonzerorow += (n > 0);
1573     for (j = 0; j < n; j++) {
1574       sum1 += v[jrow] * x[10 * idx[jrow]];
1575       sum2 += v[jrow] * x[10 * idx[jrow] + 1];
1576       sum3 += v[jrow] * x[10 * idx[jrow] + 2];
1577       sum4 += v[jrow] * x[10 * idx[jrow] + 3];
1578       sum5 += v[jrow] * x[10 * idx[jrow] + 4];
1579       sum6 += v[jrow] * x[10 * idx[jrow] + 5];
1580       sum7 += v[jrow] * x[10 * idx[jrow] + 6];
1581       sum8 += v[jrow] * x[10 * idx[jrow] + 7];
1582       sum9 += v[jrow] * x[10 * idx[jrow] + 8];
1583       sum10 += v[jrow] * x[10 * idx[jrow] + 9];
1584       jrow++;
1585     }
1586     y[10 * i]     = sum1;
1587     y[10 * i + 1] = sum2;
1588     y[10 * i + 2] = sum3;
1589     y[10 * i + 3] = sum4;
1590     y[10 * i + 4] = sum5;
1591     y[10 * i + 5] = sum6;
1592     y[10 * i + 6] = sum7;
1593     y[10 * i + 7] = sum8;
1594     y[10 * i + 8] = sum9;
1595     y[10 * i + 9] = sum10;
1596   }
1597 
1598   PetscCall(PetscLogFlops(20.0 * a->nz - 10.0 * nonzerorow));
1599   PetscCall(VecRestoreArrayRead(xx, &x));
1600   PetscCall(VecRestoreArray(yy, &y));
1601   PetscFunctionReturn(0);
1602 }
1603 
1604 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) {
1605   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1606   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1607   const PetscScalar *x, *v;
1608   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1609   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1610   PetscInt           n, i, jrow, j;
1611 
1612   PetscFunctionBegin;
1613   if (yy != zz) PetscCall(VecCopy(yy, zz));
1614   PetscCall(VecGetArrayRead(xx, &x));
1615   PetscCall(VecGetArray(zz, &y));
1616   idx = a->j;
1617   v   = a->a;
1618   ii  = a->i;
1619 
1620   for (i = 0; i < m; i++) {
1621     jrow  = ii[i];
1622     n     = ii[i + 1] - jrow;
1623     sum1  = 0.0;
1624     sum2  = 0.0;
1625     sum3  = 0.0;
1626     sum4  = 0.0;
1627     sum5  = 0.0;
1628     sum6  = 0.0;
1629     sum7  = 0.0;
1630     sum8  = 0.0;
1631     sum9  = 0.0;
1632     sum10 = 0.0;
1633     for (j = 0; j < n; j++) {
1634       sum1 += v[jrow] * x[10 * idx[jrow]];
1635       sum2 += v[jrow] * x[10 * idx[jrow] + 1];
1636       sum3 += v[jrow] * x[10 * idx[jrow] + 2];
1637       sum4 += v[jrow] * x[10 * idx[jrow] + 3];
1638       sum5 += v[jrow] * x[10 * idx[jrow] + 4];
1639       sum6 += v[jrow] * x[10 * idx[jrow] + 5];
1640       sum7 += v[jrow] * x[10 * idx[jrow] + 6];
1641       sum8 += v[jrow] * x[10 * idx[jrow] + 7];
1642       sum9 += v[jrow] * x[10 * idx[jrow] + 8];
1643       sum10 += v[jrow] * x[10 * idx[jrow] + 9];
1644       jrow++;
1645     }
1646     y[10 * i] += sum1;
1647     y[10 * i + 1] += sum2;
1648     y[10 * i + 2] += sum3;
1649     y[10 * i + 3] += sum4;
1650     y[10 * i + 4] += sum5;
1651     y[10 * i + 5] += sum6;
1652     y[10 * i + 6] += sum7;
1653     y[10 * i + 7] += sum8;
1654     y[10 * i + 8] += sum9;
1655     y[10 * i + 9] += sum10;
1656   }
1657 
1658   PetscCall(PetscLogFlops(20.0 * a->nz));
1659   PetscCall(VecRestoreArrayRead(xx, &x));
1660   PetscCall(VecRestoreArray(yy, &y));
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy) {
1665   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1666   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1667   const PetscScalar *x, *v;
1668   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10;
1669   const PetscInt     m = b->AIJ->rmap->n, *idx;
1670   PetscInt           n, i;
1671 
1672   PetscFunctionBegin;
1673   PetscCall(VecSet(yy, 0.0));
1674   PetscCall(VecGetArrayRead(xx, &x));
1675   PetscCall(VecGetArray(yy, &y));
1676 
1677   for (i = 0; i < m; i++) {
1678     idx     = a->j + a->i[i];
1679     v       = a->a + a->i[i];
1680     n       = a->i[i + 1] - a->i[i];
1681     alpha1  = x[10 * i];
1682     alpha2  = x[10 * i + 1];
1683     alpha3  = x[10 * i + 2];
1684     alpha4  = x[10 * i + 3];
1685     alpha5  = x[10 * i + 4];
1686     alpha6  = x[10 * i + 5];
1687     alpha7  = x[10 * i + 6];
1688     alpha8  = x[10 * i + 7];
1689     alpha9  = x[10 * i + 8];
1690     alpha10 = x[10 * i + 9];
1691     while (n-- > 0) {
1692       y[10 * (*idx)] += alpha1 * (*v);
1693       y[10 * (*idx) + 1] += alpha2 * (*v);
1694       y[10 * (*idx) + 2] += alpha3 * (*v);
1695       y[10 * (*idx) + 3] += alpha4 * (*v);
1696       y[10 * (*idx) + 4] += alpha5 * (*v);
1697       y[10 * (*idx) + 5] += alpha6 * (*v);
1698       y[10 * (*idx) + 6] += alpha7 * (*v);
1699       y[10 * (*idx) + 7] += alpha8 * (*v);
1700       y[10 * (*idx) + 8] += alpha9 * (*v);
1701       y[10 * (*idx) + 9] += alpha10 * (*v);
1702       idx++;
1703       v++;
1704     }
1705   }
1706   PetscCall(PetscLogFlops(20.0 * a->nz));
1707   PetscCall(VecRestoreArrayRead(xx, &x));
1708   PetscCall(VecRestoreArray(yy, &y));
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) {
1713   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1714   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1715   const PetscScalar *x, *v;
1716   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10;
1717   const PetscInt     m = b->AIJ->rmap->n, *idx;
1718   PetscInt           n, i;
1719 
1720   PetscFunctionBegin;
1721   if (yy != zz) PetscCall(VecCopy(yy, zz));
1722   PetscCall(VecGetArrayRead(xx, &x));
1723   PetscCall(VecGetArray(zz, &y));
1724   for (i = 0; i < m; i++) {
1725     idx     = a->j + a->i[i];
1726     v       = a->a + a->i[i];
1727     n       = a->i[i + 1] - a->i[i];
1728     alpha1  = x[10 * i];
1729     alpha2  = x[10 * i + 1];
1730     alpha3  = x[10 * i + 2];
1731     alpha4  = x[10 * i + 3];
1732     alpha5  = x[10 * i + 4];
1733     alpha6  = x[10 * i + 5];
1734     alpha7  = x[10 * i + 6];
1735     alpha8  = x[10 * i + 7];
1736     alpha9  = x[10 * i + 8];
1737     alpha10 = x[10 * i + 9];
1738     while (n-- > 0) {
1739       y[10 * (*idx)] += alpha1 * (*v);
1740       y[10 * (*idx) + 1] += alpha2 * (*v);
1741       y[10 * (*idx) + 2] += alpha3 * (*v);
1742       y[10 * (*idx) + 3] += alpha4 * (*v);
1743       y[10 * (*idx) + 4] += alpha5 * (*v);
1744       y[10 * (*idx) + 5] += alpha6 * (*v);
1745       y[10 * (*idx) + 6] += alpha7 * (*v);
1746       y[10 * (*idx) + 7] += alpha8 * (*v);
1747       y[10 * (*idx) + 8] += alpha9 * (*v);
1748       y[10 * (*idx) + 9] += alpha10 * (*v);
1749       idx++;
1750       v++;
1751     }
1752   }
1753   PetscCall(PetscLogFlops(20.0 * a->nz));
1754   PetscCall(VecRestoreArrayRead(xx, &x));
1755   PetscCall(VecRestoreArray(zz, &y));
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 /*--------------------------------------------------------------------------------------------*/
1760 PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy) {
1761   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1762   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1763   const PetscScalar *x, *v;
1764   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1765   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1766   PetscInt           nonzerorow = 0, n, i, jrow, j;
1767 
1768   PetscFunctionBegin;
1769   PetscCall(VecGetArrayRead(xx, &x));
1770   PetscCall(VecGetArray(yy, &y));
1771   idx = a->j;
1772   v   = a->a;
1773   ii  = a->i;
1774 
1775   for (i = 0; i < m; i++) {
1776     jrow  = ii[i];
1777     n     = ii[i + 1] - jrow;
1778     sum1  = 0.0;
1779     sum2  = 0.0;
1780     sum3  = 0.0;
1781     sum4  = 0.0;
1782     sum5  = 0.0;
1783     sum6  = 0.0;
1784     sum7  = 0.0;
1785     sum8  = 0.0;
1786     sum9  = 0.0;
1787     sum10 = 0.0;
1788     sum11 = 0.0;
1789 
1790     nonzerorow += (n > 0);
1791     for (j = 0; j < n; j++) {
1792       sum1 += v[jrow] * x[11 * idx[jrow]];
1793       sum2 += v[jrow] * x[11 * idx[jrow] + 1];
1794       sum3 += v[jrow] * x[11 * idx[jrow] + 2];
1795       sum4 += v[jrow] * x[11 * idx[jrow] + 3];
1796       sum5 += v[jrow] * x[11 * idx[jrow] + 4];
1797       sum6 += v[jrow] * x[11 * idx[jrow] + 5];
1798       sum7 += v[jrow] * x[11 * idx[jrow] + 6];
1799       sum8 += v[jrow] * x[11 * idx[jrow] + 7];
1800       sum9 += v[jrow] * x[11 * idx[jrow] + 8];
1801       sum10 += v[jrow] * x[11 * idx[jrow] + 9];
1802       sum11 += v[jrow] * x[11 * idx[jrow] + 10];
1803       jrow++;
1804     }
1805     y[11 * i]      = sum1;
1806     y[11 * i + 1]  = sum2;
1807     y[11 * i + 2]  = sum3;
1808     y[11 * i + 3]  = sum4;
1809     y[11 * i + 4]  = sum5;
1810     y[11 * i + 5]  = sum6;
1811     y[11 * i + 6]  = sum7;
1812     y[11 * i + 7]  = sum8;
1813     y[11 * i + 8]  = sum9;
1814     y[11 * i + 9]  = sum10;
1815     y[11 * i + 10] = sum11;
1816   }
1817 
1818   PetscCall(PetscLogFlops(22.0 * a->nz - 11 * nonzerorow));
1819   PetscCall(VecRestoreArrayRead(xx, &x));
1820   PetscCall(VecRestoreArray(yy, &y));
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) {
1825   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1826   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1827   const PetscScalar *x, *v;
1828   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1829   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1830   PetscInt           n, i, jrow, j;
1831 
1832   PetscFunctionBegin;
1833   if (yy != zz) PetscCall(VecCopy(yy, zz));
1834   PetscCall(VecGetArrayRead(xx, &x));
1835   PetscCall(VecGetArray(zz, &y));
1836   idx = a->j;
1837   v   = a->a;
1838   ii  = a->i;
1839 
1840   for (i = 0; i < m; i++) {
1841     jrow  = ii[i];
1842     n     = ii[i + 1] - jrow;
1843     sum1  = 0.0;
1844     sum2  = 0.0;
1845     sum3  = 0.0;
1846     sum4  = 0.0;
1847     sum5  = 0.0;
1848     sum6  = 0.0;
1849     sum7  = 0.0;
1850     sum8  = 0.0;
1851     sum9  = 0.0;
1852     sum10 = 0.0;
1853     sum11 = 0.0;
1854     for (j = 0; j < n; j++) {
1855       sum1 += v[jrow] * x[11 * idx[jrow]];
1856       sum2 += v[jrow] * x[11 * idx[jrow] + 1];
1857       sum3 += v[jrow] * x[11 * idx[jrow] + 2];
1858       sum4 += v[jrow] * x[11 * idx[jrow] + 3];
1859       sum5 += v[jrow] * x[11 * idx[jrow] + 4];
1860       sum6 += v[jrow] * x[11 * idx[jrow] + 5];
1861       sum7 += v[jrow] * x[11 * idx[jrow] + 6];
1862       sum8 += v[jrow] * x[11 * idx[jrow] + 7];
1863       sum9 += v[jrow] * x[11 * idx[jrow] + 8];
1864       sum10 += v[jrow] * x[11 * idx[jrow] + 9];
1865       sum11 += v[jrow] * x[11 * idx[jrow] + 10];
1866       jrow++;
1867     }
1868     y[11 * i] += sum1;
1869     y[11 * i + 1] += sum2;
1870     y[11 * i + 2] += sum3;
1871     y[11 * i + 3] += sum4;
1872     y[11 * i + 4] += sum5;
1873     y[11 * i + 5] += sum6;
1874     y[11 * i + 6] += sum7;
1875     y[11 * i + 7] += sum8;
1876     y[11 * i + 8] += sum9;
1877     y[11 * i + 9] += sum10;
1878     y[11 * i + 10] += sum11;
1879   }
1880 
1881   PetscCall(PetscLogFlops(22.0 * a->nz));
1882   PetscCall(VecRestoreArrayRead(xx, &x));
1883   PetscCall(VecRestoreArray(yy, &y));
1884   PetscFunctionReturn(0);
1885 }
1886 
1887 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy) {
1888   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1889   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1890   const PetscScalar *x, *v;
1891   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11;
1892   const PetscInt     m = b->AIJ->rmap->n, *idx;
1893   PetscInt           n, i;
1894 
1895   PetscFunctionBegin;
1896   PetscCall(VecSet(yy, 0.0));
1897   PetscCall(VecGetArrayRead(xx, &x));
1898   PetscCall(VecGetArray(yy, &y));
1899 
1900   for (i = 0; i < m; i++) {
1901     idx     = a->j + a->i[i];
1902     v       = a->a + a->i[i];
1903     n       = a->i[i + 1] - a->i[i];
1904     alpha1  = x[11 * i];
1905     alpha2  = x[11 * i + 1];
1906     alpha3  = x[11 * i + 2];
1907     alpha4  = x[11 * i + 3];
1908     alpha5  = x[11 * i + 4];
1909     alpha6  = x[11 * i + 5];
1910     alpha7  = x[11 * i + 6];
1911     alpha8  = x[11 * i + 7];
1912     alpha9  = x[11 * i + 8];
1913     alpha10 = x[11 * i + 9];
1914     alpha11 = x[11 * i + 10];
1915     while (n-- > 0) {
1916       y[11 * (*idx)] += alpha1 * (*v);
1917       y[11 * (*idx) + 1] += alpha2 * (*v);
1918       y[11 * (*idx) + 2] += alpha3 * (*v);
1919       y[11 * (*idx) + 3] += alpha4 * (*v);
1920       y[11 * (*idx) + 4] += alpha5 * (*v);
1921       y[11 * (*idx) + 5] += alpha6 * (*v);
1922       y[11 * (*idx) + 6] += alpha7 * (*v);
1923       y[11 * (*idx) + 7] += alpha8 * (*v);
1924       y[11 * (*idx) + 8] += alpha9 * (*v);
1925       y[11 * (*idx) + 9] += alpha10 * (*v);
1926       y[11 * (*idx) + 10] += alpha11 * (*v);
1927       idx++;
1928       v++;
1929     }
1930   }
1931   PetscCall(PetscLogFlops(22.0 * a->nz));
1932   PetscCall(VecRestoreArrayRead(xx, &x));
1933   PetscCall(VecRestoreArray(yy, &y));
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) {
1938   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1939   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1940   const PetscScalar *x, *v;
1941   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11;
1942   const PetscInt     m = b->AIJ->rmap->n, *idx;
1943   PetscInt           n, i;
1944 
1945   PetscFunctionBegin;
1946   if (yy != zz) PetscCall(VecCopy(yy, zz));
1947   PetscCall(VecGetArrayRead(xx, &x));
1948   PetscCall(VecGetArray(zz, &y));
1949   for (i = 0; i < m; i++) {
1950     idx     = a->j + a->i[i];
1951     v       = a->a + a->i[i];
1952     n       = a->i[i + 1] - a->i[i];
1953     alpha1  = x[11 * i];
1954     alpha2  = x[11 * i + 1];
1955     alpha3  = x[11 * i + 2];
1956     alpha4  = x[11 * i + 3];
1957     alpha5  = x[11 * i + 4];
1958     alpha6  = x[11 * i + 5];
1959     alpha7  = x[11 * i + 6];
1960     alpha8  = x[11 * i + 7];
1961     alpha9  = x[11 * i + 8];
1962     alpha10 = x[11 * i + 9];
1963     alpha11 = x[11 * i + 10];
1964     while (n-- > 0) {
1965       y[11 * (*idx)] += alpha1 * (*v);
1966       y[11 * (*idx) + 1] += alpha2 * (*v);
1967       y[11 * (*idx) + 2] += alpha3 * (*v);
1968       y[11 * (*idx) + 3] += alpha4 * (*v);
1969       y[11 * (*idx) + 4] += alpha5 * (*v);
1970       y[11 * (*idx) + 5] += alpha6 * (*v);
1971       y[11 * (*idx) + 6] += alpha7 * (*v);
1972       y[11 * (*idx) + 7] += alpha8 * (*v);
1973       y[11 * (*idx) + 8] += alpha9 * (*v);
1974       y[11 * (*idx) + 9] += alpha10 * (*v);
1975       y[11 * (*idx) + 10] += alpha11 * (*v);
1976       idx++;
1977       v++;
1978     }
1979   }
1980   PetscCall(PetscLogFlops(22.0 * a->nz));
1981   PetscCall(VecRestoreArrayRead(xx, &x));
1982   PetscCall(VecRestoreArray(zz, &y));
1983   PetscFunctionReturn(0);
1984 }
1985 
1986 /*--------------------------------------------------------------------------------------------*/
1987 PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy) {
1988   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1989   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1990   const PetscScalar *x, *v;
1991   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1992   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1993   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1994   PetscInt           nonzerorow = 0, n, i, jrow, j;
1995 
1996   PetscFunctionBegin;
1997   PetscCall(VecGetArrayRead(xx, &x));
1998   PetscCall(VecGetArray(yy, &y));
1999   idx = a->j;
2000   v   = a->a;
2001   ii  = a->i;
2002 
2003   for (i = 0; i < m; i++) {
2004     jrow  = ii[i];
2005     n     = ii[i + 1] - jrow;
2006     sum1  = 0.0;
2007     sum2  = 0.0;
2008     sum3  = 0.0;
2009     sum4  = 0.0;
2010     sum5  = 0.0;
2011     sum6  = 0.0;
2012     sum7  = 0.0;
2013     sum8  = 0.0;
2014     sum9  = 0.0;
2015     sum10 = 0.0;
2016     sum11 = 0.0;
2017     sum12 = 0.0;
2018     sum13 = 0.0;
2019     sum14 = 0.0;
2020     sum15 = 0.0;
2021     sum16 = 0.0;
2022 
2023     nonzerorow += (n > 0);
2024     for (j = 0; j < n; j++) {
2025       sum1 += v[jrow] * x[16 * idx[jrow]];
2026       sum2 += v[jrow] * x[16 * idx[jrow] + 1];
2027       sum3 += v[jrow] * x[16 * idx[jrow] + 2];
2028       sum4 += v[jrow] * x[16 * idx[jrow] + 3];
2029       sum5 += v[jrow] * x[16 * idx[jrow] + 4];
2030       sum6 += v[jrow] * x[16 * idx[jrow] + 5];
2031       sum7 += v[jrow] * x[16 * idx[jrow] + 6];
2032       sum8 += v[jrow] * x[16 * idx[jrow] + 7];
2033       sum9 += v[jrow] * x[16 * idx[jrow] + 8];
2034       sum10 += v[jrow] * x[16 * idx[jrow] + 9];
2035       sum11 += v[jrow] * x[16 * idx[jrow] + 10];
2036       sum12 += v[jrow] * x[16 * idx[jrow] + 11];
2037       sum13 += v[jrow] * x[16 * idx[jrow] + 12];
2038       sum14 += v[jrow] * x[16 * idx[jrow] + 13];
2039       sum15 += v[jrow] * x[16 * idx[jrow] + 14];
2040       sum16 += v[jrow] * x[16 * idx[jrow] + 15];
2041       jrow++;
2042     }
2043     y[16 * i]      = sum1;
2044     y[16 * i + 1]  = sum2;
2045     y[16 * i + 2]  = sum3;
2046     y[16 * i + 3]  = sum4;
2047     y[16 * i + 4]  = sum5;
2048     y[16 * i + 5]  = sum6;
2049     y[16 * i + 6]  = sum7;
2050     y[16 * i + 7]  = sum8;
2051     y[16 * i + 8]  = sum9;
2052     y[16 * i + 9]  = sum10;
2053     y[16 * i + 10] = sum11;
2054     y[16 * i + 11] = sum12;
2055     y[16 * i + 12] = sum13;
2056     y[16 * i + 13] = sum14;
2057     y[16 * i + 14] = sum15;
2058     y[16 * i + 15] = sum16;
2059   }
2060 
2061   PetscCall(PetscLogFlops(32.0 * a->nz - 16.0 * nonzerorow));
2062   PetscCall(VecRestoreArrayRead(xx, &x));
2063   PetscCall(VecRestoreArray(yy, &y));
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy) {
2068   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2069   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2070   const PetscScalar *x, *v;
2071   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2072   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16;
2073   const PetscInt     m = b->AIJ->rmap->n, *idx;
2074   PetscInt           n, i;
2075 
2076   PetscFunctionBegin;
2077   PetscCall(VecSet(yy, 0.0));
2078   PetscCall(VecGetArrayRead(xx, &x));
2079   PetscCall(VecGetArray(yy, &y));
2080 
2081   for (i = 0; i < m; i++) {
2082     idx     = a->j + a->i[i];
2083     v       = a->a + a->i[i];
2084     n       = a->i[i + 1] - a->i[i];
2085     alpha1  = x[16 * i];
2086     alpha2  = x[16 * i + 1];
2087     alpha3  = x[16 * i + 2];
2088     alpha4  = x[16 * i + 3];
2089     alpha5  = x[16 * i + 4];
2090     alpha6  = x[16 * i + 5];
2091     alpha7  = x[16 * i + 6];
2092     alpha8  = x[16 * i + 7];
2093     alpha9  = x[16 * i + 8];
2094     alpha10 = x[16 * i + 9];
2095     alpha11 = x[16 * i + 10];
2096     alpha12 = x[16 * i + 11];
2097     alpha13 = x[16 * i + 12];
2098     alpha14 = x[16 * i + 13];
2099     alpha15 = x[16 * i + 14];
2100     alpha16 = x[16 * i + 15];
2101     while (n-- > 0) {
2102       y[16 * (*idx)] += alpha1 * (*v);
2103       y[16 * (*idx) + 1] += alpha2 * (*v);
2104       y[16 * (*idx) + 2] += alpha3 * (*v);
2105       y[16 * (*idx) + 3] += alpha4 * (*v);
2106       y[16 * (*idx) + 4] += alpha5 * (*v);
2107       y[16 * (*idx) + 5] += alpha6 * (*v);
2108       y[16 * (*idx) + 6] += alpha7 * (*v);
2109       y[16 * (*idx) + 7] += alpha8 * (*v);
2110       y[16 * (*idx) + 8] += alpha9 * (*v);
2111       y[16 * (*idx) + 9] += alpha10 * (*v);
2112       y[16 * (*idx) + 10] += alpha11 * (*v);
2113       y[16 * (*idx) + 11] += alpha12 * (*v);
2114       y[16 * (*idx) + 12] += alpha13 * (*v);
2115       y[16 * (*idx) + 13] += alpha14 * (*v);
2116       y[16 * (*idx) + 14] += alpha15 * (*v);
2117       y[16 * (*idx) + 15] += alpha16 * (*v);
2118       idx++;
2119       v++;
2120     }
2121   }
2122   PetscCall(PetscLogFlops(32.0 * a->nz));
2123   PetscCall(VecRestoreArrayRead(xx, &x));
2124   PetscCall(VecRestoreArray(yy, &y));
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) {
2129   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2130   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2131   const PetscScalar *x, *v;
2132   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2133   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2134   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2135   PetscInt           n, i, jrow, j;
2136 
2137   PetscFunctionBegin;
2138   if (yy != zz) PetscCall(VecCopy(yy, zz));
2139   PetscCall(VecGetArrayRead(xx, &x));
2140   PetscCall(VecGetArray(zz, &y));
2141   idx = a->j;
2142   v   = a->a;
2143   ii  = a->i;
2144 
2145   for (i = 0; i < m; i++) {
2146     jrow  = ii[i];
2147     n     = ii[i + 1] - jrow;
2148     sum1  = 0.0;
2149     sum2  = 0.0;
2150     sum3  = 0.0;
2151     sum4  = 0.0;
2152     sum5  = 0.0;
2153     sum6  = 0.0;
2154     sum7  = 0.0;
2155     sum8  = 0.0;
2156     sum9  = 0.0;
2157     sum10 = 0.0;
2158     sum11 = 0.0;
2159     sum12 = 0.0;
2160     sum13 = 0.0;
2161     sum14 = 0.0;
2162     sum15 = 0.0;
2163     sum16 = 0.0;
2164     for (j = 0; j < n; j++) {
2165       sum1 += v[jrow] * x[16 * idx[jrow]];
2166       sum2 += v[jrow] * x[16 * idx[jrow] + 1];
2167       sum3 += v[jrow] * x[16 * idx[jrow] + 2];
2168       sum4 += v[jrow] * x[16 * idx[jrow] + 3];
2169       sum5 += v[jrow] * x[16 * idx[jrow] + 4];
2170       sum6 += v[jrow] * x[16 * idx[jrow] + 5];
2171       sum7 += v[jrow] * x[16 * idx[jrow] + 6];
2172       sum8 += v[jrow] * x[16 * idx[jrow] + 7];
2173       sum9 += v[jrow] * x[16 * idx[jrow] + 8];
2174       sum10 += v[jrow] * x[16 * idx[jrow] + 9];
2175       sum11 += v[jrow] * x[16 * idx[jrow] + 10];
2176       sum12 += v[jrow] * x[16 * idx[jrow] + 11];
2177       sum13 += v[jrow] * x[16 * idx[jrow] + 12];
2178       sum14 += v[jrow] * x[16 * idx[jrow] + 13];
2179       sum15 += v[jrow] * x[16 * idx[jrow] + 14];
2180       sum16 += v[jrow] * x[16 * idx[jrow] + 15];
2181       jrow++;
2182     }
2183     y[16 * i] += sum1;
2184     y[16 * i + 1] += sum2;
2185     y[16 * i + 2] += sum3;
2186     y[16 * i + 3] += sum4;
2187     y[16 * i + 4] += sum5;
2188     y[16 * i + 5] += sum6;
2189     y[16 * i + 6] += sum7;
2190     y[16 * i + 7] += sum8;
2191     y[16 * i + 8] += sum9;
2192     y[16 * i + 9] += sum10;
2193     y[16 * i + 10] += sum11;
2194     y[16 * i + 11] += sum12;
2195     y[16 * i + 12] += sum13;
2196     y[16 * i + 13] += sum14;
2197     y[16 * i + 14] += sum15;
2198     y[16 * i + 15] += sum16;
2199   }
2200 
2201   PetscCall(PetscLogFlops(32.0 * a->nz));
2202   PetscCall(VecRestoreArrayRead(xx, &x));
2203   PetscCall(VecRestoreArray(zz, &y));
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) {
2208   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2209   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2210   const PetscScalar *x, *v;
2211   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2212   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16;
2213   const PetscInt     m = b->AIJ->rmap->n, *idx;
2214   PetscInt           n, i;
2215 
2216   PetscFunctionBegin;
2217   if (yy != zz) PetscCall(VecCopy(yy, zz));
2218   PetscCall(VecGetArrayRead(xx, &x));
2219   PetscCall(VecGetArray(zz, &y));
2220   for (i = 0; i < m; i++) {
2221     idx     = a->j + a->i[i];
2222     v       = a->a + a->i[i];
2223     n       = a->i[i + 1] - a->i[i];
2224     alpha1  = x[16 * i];
2225     alpha2  = x[16 * i + 1];
2226     alpha3  = x[16 * i + 2];
2227     alpha4  = x[16 * i + 3];
2228     alpha5  = x[16 * i + 4];
2229     alpha6  = x[16 * i + 5];
2230     alpha7  = x[16 * i + 6];
2231     alpha8  = x[16 * i + 7];
2232     alpha9  = x[16 * i + 8];
2233     alpha10 = x[16 * i + 9];
2234     alpha11 = x[16 * i + 10];
2235     alpha12 = x[16 * i + 11];
2236     alpha13 = x[16 * i + 12];
2237     alpha14 = x[16 * i + 13];
2238     alpha15 = x[16 * i + 14];
2239     alpha16 = x[16 * i + 15];
2240     while (n-- > 0) {
2241       y[16 * (*idx)] += alpha1 * (*v);
2242       y[16 * (*idx) + 1] += alpha2 * (*v);
2243       y[16 * (*idx) + 2] += alpha3 * (*v);
2244       y[16 * (*idx) + 3] += alpha4 * (*v);
2245       y[16 * (*idx) + 4] += alpha5 * (*v);
2246       y[16 * (*idx) + 5] += alpha6 * (*v);
2247       y[16 * (*idx) + 6] += alpha7 * (*v);
2248       y[16 * (*idx) + 7] += alpha8 * (*v);
2249       y[16 * (*idx) + 8] += alpha9 * (*v);
2250       y[16 * (*idx) + 9] += alpha10 * (*v);
2251       y[16 * (*idx) + 10] += alpha11 * (*v);
2252       y[16 * (*idx) + 11] += alpha12 * (*v);
2253       y[16 * (*idx) + 12] += alpha13 * (*v);
2254       y[16 * (*idx) + 13] += alpha14 * (*v);
2255       y[16 * (*idx) + 14] += alpha15 * (*v);
2256       y[16 * (*idx) + 15] += alpha16 * (*v);
2257       idx++;
2258       v++;
2259     }
2260   }
2261   PetscCall(PetscLogFlops(32.0 * a->nz));
2262   PetscCall(VecRestoreArrayRead(xx, &x));
2263   PetscCall(VecRestoreArray(zz, &y));
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 /*--------------------------------------------------------------------------------------------*/
2268 PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy) {
2269   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2270   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2271   const PetscScalar *x, *v;
2272   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2273   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2274   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
2275   PetscInt           nonzerorow = 0, n, i, jrow, j;
2276 
2277   PetscFunctionBegin;
2278   PetscCall(VecGetArrayRead(xx, &x));
2279   PetscCall(VecGetArray(yy, &y));
2280   idx = a->j;
2281   v   = a->a;
2282   ii  = a->i;
2283 
2284   for (i = 0; i < m; i++) {
2285     jrow  = ii[i];
2286     n     = ii[i + 1] - jrow;
2287     sum1  = 0.0;
2288     sum2  = 0.0;
2289     sum3  = 0.0;
2290     sum4  = 0.0;
2291     sum5  = 0.0;
2292     sum6  = 0.0;
2293     sum7  = 0.0;
2294     sum8  = 0.0;
2295     sum9  = 0.0;
2296     sum10 = 0.0;
2297     sum11 = 0.0;
2298     sum12 = 0.0;
2299     sum13 = 0.0;
2300     sum14 = 0.0;
2301     sum15 = 0.0;
2302     sum16 = 0.0;
2303     sum17 = 0.0;
2304     sum18 = 0.0;
2305 
2306     nonzerorow += (n > 0);
2307     for (j = 0; j < n; j++) {
2308       sum1 += v[jrow] * x[18 * idx[jrow]];
2309       sum2 += v[jrow] * x[18 * idx[jrow] + 1];
2310       sum3 += v[jrow] * x[18 * idx[jrow] + 2];
2311       sum4 += v[jrow] * x[18 * idx[jrow] + 3];
2312       sum5 += v[jrow] * x[18 * idx[jrow] + 4];
2313       sum6 += v[jrow] * x[18 * idx[jrow] + 5];
2314       sum7 += v[jrow] * x[18 * idx[jrow] + 6];
2315       sum8 += v[jrow] * x[18 * idx[jrow] + 7];
2316       sum9 += v[jrow] * x[18 * idx[jrow] + 8];
2317       sum10 += v[jrow] * x[18 * idx[jrow] + 9];
2318       sum11 += v[jrow] * x[18 * idx[jrow] + 10];
2319       sum12 += v[jrow] * x[18 * idx[jrow] + 11];
2320       sum13 += v[jrow] * x[18 * idx[jrow] + 12];
2321       sum14 += v[jrow] * x[18 * idx[jrow] + 13];
2322       sum15 += v[jrow] * x[18 * idx[jrow] + 14];
2323       sum16 += v[jrow] * x[18 * idx[jrow] + 15];
2324       sum17 += v[jrow] * x[18 * idx[jrow] + 16];
2325       sum18 += v[jrow] * x[18 * idx[jrow] + 17];
2326       jrow++;
2327     }
2328     y[18 * i]      = sum1;
2329     y[18 * i + 1]  = sum2;
2330     y[18 * i + 2]  = sum3;
2331     y[18 * i + 3]  = sum4;
2332     y[18 * i + 4]  = sum5;
2333     y[18 * i + 5]  = sum6;
2334     y[18 * i + 6]  = sum7;
2335     y[18 * i + 7]  = sum8;
2336     y[18 * i + 8]  = sum9;
2337     y[18 * i + 9]  = sum10;
2338     y[18 * i + 10] = sum11;
2339     y[18 * i + 11] = sum12;
2340     y[18 * i + 12] = sum13;
2341     y[18 * i + 13] = sum14;
2342     y[18 * i + 14] = sum15;
2343     y[18 * i + 15] = sum16;
2344     y[18 * i + 16] = sum17;
2345     y[18 * i + 17] = sum18;
2346   }
2347 
2348   PetscCall(PetscLogFlops(36.0 * a->nz - 18.0 * nonzerorow));
2349   PetscCall(VecRestoreArrayRead(xx, &x));
2350   PetscCall(VecRestoreArray(yy, &y));
2351   PetscFunctionReturn(0);
2352 }
2353 
2354 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy) {
2355   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2356   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2357   const PetscScalar *x, *v;
2358   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2359   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18;
2360   const PetscInt     m = b->AIJ->rmap->n, *idx;
2361   PetscInt           n, i;
2362 
2363   PetscFunctionBegin;
2364   PetscCall(VecSet(yy, 0.0));
2365   PetscCall(VecGetArrayRead(xx, &x));
2366   PetscCall(VecGetArray(yy, &y));
2367 
2368   for (i = 0; i < m; i++) {
2369     idx     = a->j + a->i[i];
2370     v       = a->a + a->i[i];
2371     n       = a->i[i + 1] - a->i[i];
2372     alpha1  = x[18 * i];
2373     alpha2  = x[18 * i + 1];
2374     alpha3  = x[18 * i + 2];
2375     alpha4  = x[18 * i + 3];
2376     alpha5  = x[18 * i + 4];
2377     alpha6  = x[18 * i + 5];
2378     alpha7  = x[18 * i + 6];
2379     alpha8  = x[18 * i + 7];
2380     alpha9  = x[18 * i + 8];
2381     alpha10 = x[18 * i + 9];
2382     alpha11 = x[18 * i + 10];
2383     alpha12 = x[18 * i + 11];
2384     alpha13 = x[18 * i + 12];
2385     alpha14 = x[18 * i + 13];
2386     alpha15 = x[18 * i + 14];
2387     alpha16 = x[18 * i + 15];
2388     alpha17 = x[18 * i + 16];
2389     alpha18 = x[18 * i + 17];
2390     while (n-- > 0) {
2391       y[18 * (*idx)] += alpha1 * (*v);
2392       y[18 * (*idx) + 1] += alpha2 * (*v);
2393       y[18 * (*idx) + 2] += alpha3 * (*v);
2394       y[18 * (*idx) + 3] += alpha4 * (*v);
2395       y[18 * (*idx) + 4] += alpha5 * (*v);
2396       y[18 * (*idx) + 5] += alpha6 * (*v);
2397       y[18 * (*idx) + 6] += alpha7 * (*v);
2398       y[18 * (*idx) + 7] += alpha8 * (*v);
2399       y[18 * (*idx) + 8] += alpha9 * (*v);
2400       y[18 * (*idx) + 9] += alpha10 * (*v);
2401       y[18 * (*idx) + 10] += alpha11 * (*v);
2402       y[18 * (*idx) + 11] += alpha12 * (*v);
2403       y[18 * (*idx) + 12] += alpha13 * (*v);
2404       y[18 * (*idx) + 13] += alpha14 * (*v);
2405       y[18 * (*idx) + 14] += alpha15 * (*v);
2406       y[18 * (*idx) + 15] += alpha16 * (*v);
2407       y[18 * (*idx) + 16] += alpha17 * (*v);
2408       y[18 * (*idx) + 17] += alpha18 * (*v);
2409       idx++;
2410       v++;
2411     }
2412   }
2413   PetscCall(PetscLogFlops(36.0 * a->nz));
2414   PetscCall(VecRestoreArrayRead(xx, &x));
2415   PetscCall(VecRestoreArray(yy, &y));
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) {
2420   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2421   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2422   const PetscScalar *x, *v;
2423   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2424   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2425   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2426   PetscInt           n, i, jrow, j;
2427 
2428   PetscFunctionBegin;
2429   if (yy != zz) PetscCall(VecCopy(yy, zz));
2430   PetscCall(VecGetArrayRead(xx, &x));
2431   PetscCall(VecGetArray(zz, &y));
2432   idx = a->j;
2433   v   = a->a;
2434   ii  = a->i;
2435 
2436   for (i = 0; i < m; i++) {
2437     jrow  = ii[i];
2438     n     = ii[i + 1] - jrow;
2439     sum1  = 0.0;
2440     sum2  = 0.0;
2441     sum3  = 0.0;
2442     sum4  = 0.0;
2443     sum5  = 0.0;
2444     sum6  = 0.0;
2445     sum7  = 0.0;
2446     sum8  = 0.0;
2447     sum9  = 0.0;
2448     sum10 = 0.0;
2449     sum11 = 0.0;
2450     sum12 = 0.0;
2451     sum13 = 0.0;
2452     sum14 = 0.0;
2453     sum15 = 0.0;
2454     sum16 = 0.0;
2455     sum17 = 0.0;
2456     sum18 = 0.0;
2457     for (j = 0; j < n; j++) {
2458       sum1 += v[jrow] * x[18 * idx[jrow]];
2459       sum2 += v[jrow] * x[18 * idx[jrow] + 1];
2460       sum3 += v[jrow] * x[18 * idx[jrow] + 2];
2461       sum4 += v[jrow] * x[18 * idx[jrow] + 3];
2462       sum5 += v[jrow] * x[18 * idx[jrow] + 4];
2463       sum6 += v[jrow] * x[18 * idx[jrow] + 5];
2464       sum7 += v[jrow] * x[18 * idx[jrow] + 6];
2465       sum8 += v[jrow] * x[18 * idx[jrow] + 7];
2466       sum9 += v[jrow] * x[18 * idx[jrow] + 8];
2467       sum10 += v[jrow] * x[18 * idx[jrow] + 9];
2468       sum11 += v[jrow] * x[18 * idx[jrow] + 10];
2469       sum12 += v[jrow] * x[18 * idx[jrow] + 11];
2470       sum13 += v[jrow] * x[18 * idx[jrow] + 12];
2471       sum14 += v[jrow] * x[18 * idx[jrow] + 13];
2472       sum15 += v[jrow] * x[18 * idx[jrow] + 14];
2473       sum16 += v[jrow] * x[18 * idx[jrow] + 15];
2474       sum17 += v[jrow] * x[18 * idx[jrow] + 16];
2475       sum18 += v[jrow] * x[18 * idx[jrow] + 17];
2476       jrow++;
2477     }
2478     y[18 * i] += sum1;
2479     y[18 * i + 1] += sum2;
2480     y[18 * i + 2] += sum3;
2481     y[18 * i + 3] += sum4;
2482     y[18 * i + 4] += sum5;
2483     y[18 * i + 5] += sum6;
2484     y[18 * i + 6] += sum7;
2485     y[18 * i + 7] += sum8;
2486     y[18 * i + 8] += sum9;
2487     y[18 * i + 9] += sum10;
2488     y[18 * i + 10] += sum11;
2489     y[18 * i + 11] += sum12;
2490     y[18 * i + 12] += sum13;
2491     y[18 * i + 13] += sum14;
2492     y[18 * i + 14] += sum15;
2493     y[18 * i + 15] += sum16;
2494     y[18 * i + 16] += sum17;
2495     y[18 * i + 17] += sum18;
2496   }
2497 
2498   PetscCall(PetscLogFlops(36.0 * a->nz));
2499   PetscCall(VecRestoreArrayRead(xx, &x));
2500   PetscCall(VecRestoreArray(zz, &y));
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) {
2505   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2506   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2507   const PetscScalar *x, *v;
2508   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2509   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18;
2510   const PetscInt     m = b->AIJ->rmap->n, *idx;
2511   PetscInt           n, i;
2512 
2513   PetscFunctionBegin;
2514   if (yy != zz) PetscCall(VecCopy(yy, zz));
2515   PetscCall(VecGetArrayRead(xx, &x));
2516   PetscCall(VecGetArray(zz, &y));
2517   for (i = 0; i < m; i++) {
2518     idx     = a->j + a->i[i];
2519     v       = a->a + a->i[i];
2520     n       = a->i[i + 1] - a->i[i];
2521     alpha1  = x[18 * i];
2522     alpha2  = x[18 * i + 1];
2523     alpha3  = x[18 * i + 2];
2524     alpha4  = x[18 * i + 3];
2525     alpha5  = x[18 * i + 4];
2526     alpha6  = x[18 * i + 5];
2527     alpha7  = x[18 * i + 6];
2528     alpha8  = x[18 * i + 7];
2529     alpha9  = x[18 * i + 8];
2530     alpha10 = x[18 * i + 9];
2531     alpha11 = x[18 * i + 10];
2532     alpha12 = x[18 * i + 11];
2533     alpha13 = x[18 * i + 12];
2534     alpha14 = x[18 * i + 13];
2535     alpha15 = x[18 * i + 14];
2536     alpha16 = x[18 * i + 15];
2537     alpha17 = x[18 * i + 16];
2538     alpha18 = x[18 * i + 17];
2539     while (n-- > 0) {
2540       y[18 * (*idx)] += alpha1 * (*v);
2541       y[18 * (*idx) + 1] += alpha2 * (*v);
2542       y[18 * (*idx) + 2] += alpha3 * (*v);
2543       y[18 * (*idx) + 3] += alpha4 * (*v);
2544       y[18 * (*idx) + 4] += alpha5 * (*v);
2545       y[18 * (*idx) + 5] += alpha6 * (*v);
2546       y[18 * (*idx) + 6] += alpha7 * (*v);
2547       y[18 * (*idx) + 7] += alpha8 * (*v);
2548       y[18 * (*idx) + 8] += alpha9 * (*v);
2549       y[18 * (*idx) + 9] += alpha10 * (*v);
2550       y[18 * (*idx) + 10] += alpha11 * (*v);
2551       y[18 * (*idx) + 11] += alpha12 * (*v);
2552       y[18 * (*idx) + 12] += alpha13 * (*v);
2553       y[18 * (*idx) + 13] += alpha14 * (*v);
2554       y[18 * (*idx) + 14] += alpha15 * (*v);
2555       y[18 * (*idx) + 15] += alpha16 * (*v);
2556       y[18 * (*idx) + 16] += alpha17 * (*v);
2557       y[18 * (*idx) + 17] += alpha18 * (*v);
2558       idx++;
2559       v++;
2560     }
2561   }
2562   PetscCall(PetscLogFlops(36.0 * a->nz));
2563   PetscCall(VecRestoreArrayRead(xx, &x));
2564   PetscCall(VecRestoreArray(zz, &y));
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) {
2569   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2570   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2571   const PetscScalar *x, *v;
2572   PetscScalar       *y, *sums;
2573   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2574   PetscInt           n, i, jrow, j, dof = b->dof, k;
2575 
2576   PetscFunctionBegin;
2577   PetscCall(VecGetArrayRead(xx, &x));
2578   PetscCall(VecSet(yy, 0.0));
2579   PetscCall(VecGetArray(yy, &y));
2580   idx = a->j;
2581   v   = a->a;
2582   ii  = a->i;
2583 
2584   for (i = 0; i < m; i++) {
2585     jrow = ii[i];
2586     n    = ii[i + 1] - jrow;
2587     sums = y + dof * i;
2588     for (j = 0; j < n; j++) {
2589       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
2590       jrow++;
2591     }
2592   }
2593 
2594   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
2595   PetscCall(VecRestoreArrayRead(xx, &x));
2596   PetscCall(VecRestoreArray(yy, &y));
2597   PetscFunctionReturn(0);
2598 }
2599 
2600 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) {
2601   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2602   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2603   const PetscScalar *x, *v;
2604   PetscScalar       *y, *sums;
2605   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2606   PetscInt           n, i, jrow, j, dof = b->dof, k;
2607 
2608   PetscFunctionBegin;
2609   if (yy != zz) PetscCall(VecCopy(yy, zz));
2610   PetscCall(VecGetArrayRead(xx, &x));
2611   PetscCall(VecGetArray(zz, &y));
2612   idx = a->j;
2613   v   = a->a;
2614   ii  = a->i;
2615 
2616   for (i = 0; i < m; i++) {
2617     jrow = ii[i];
2618     n    = ii[i + 1] - jrow;
2619     sums = y + dof * i;
2620     for (j = 0; j < n; j++) {
2621       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
2622       jrow++;
2623     }
2624   }
2625 
2626   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
2627   PetscCall(VecRestoreArrayRead(xx, &x));
2628   PetscCall(VecRestoreArray(zz, &y));
2629   PetscFunctionReturn(0);
2630 }
2631 
2632 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) {
2633   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2634   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2635   const PetscScalar *x, *v, *alpha;
2636   PetscScalar       *y;
2637   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
2638   PetscInt           n, i, k;
2639 
2640   PetscFunctionBegin;
2641   PetscCall(VecGetArrayRead(xx, &x));
2642   PetscCall(VecSet(yy, 0.0));
2643   PetscCall(VecGetArray(yy, &y));
2644   for (i = 0; i < m; i++) {
2645     idx   = a->j + a->i[i];
2646     v     = a->a + a->i[i];
2647     n     = a->i[i + 1] - a->i[i];
2648     alpha = x + dof * i;
2649     while (n-- > 0) {
2650       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
2651       idx++;
2652       v++;
2653     }
2654   }
2655   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
2656   PetscCall(VecRestoreArrayRead(xx, &x));
2657   PetscCall(VecRestoreArray(yy, &y));
2658   PetscFunctionReturn(0);
2659 }
2660 
2661 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) {
2662   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2663   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2664   const PetscScalar *x, *v, *alpha;
2665   PetscScalar       *y;
2666   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
2667   PetscInt           n, i, k;
2668 
2669   PetscFunctionBegin;
2670   if (yy != zz) PetscCall(VecCopy(yy, zz));
2671   PetscCall(VecGetArrayRead(xx, &x));
2672   PetscCall(VecGetArray(zz, &y));
2673   for (i = 0; i < m; i++) {
2674     idx   = a->j + a->i[i];
2675     v     = a->a + a->i[i];
2676     n     = a->i[i + 1] - a->i[i];
2677     alpha = x + dof * i;
2678     while (n-- > 0) {
2679       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
2680       idx++;
2681       v++;
2682     }
2683   }
2684   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
2685   PetscCall(VecRestoreArrayRead(xx, &x));
2686   PetscCall(VecRestoreArray(zz, &y));
2687   PetscFunctionReturn(0);
2688 }
2689 
2690 /*===================================================================================*/
2691 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) {
2692   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2693 
2694   PetscFunctionBegin;
2695   /* start the scatter */
2696   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
2697   PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy));
2698   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
2699   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy));
2700   PetscFunctionReturn(0);
2701 }
2702 
2703 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) {
2704   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2705 
2706   PetscFunctionBegin;
2707   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
2708   PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy));
2709   PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
2710   PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
2711   PetscFunctionReturn(0);
2712 }
2713 
2714 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) {
2715   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2716 
2717   PetscFunctionBegin;
2718   /* start the scatter */
2719   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
2720   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz));
2721   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
2722   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
2723   PetscFunctionReturn(0);
2724 }
2725 
2726 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) {
2727   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2728 
2729   PetscFunctionBegin;
2730   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
2731   PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz));
2732   PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
2733   PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
2734   PetscFunctionReturn(0);
2735 }
2736 
2737 /* ----------------------------------------------------------------*/
2738 PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) {
2739   Mat_Product *product = C->product;
2740 
2741   PetscFunctionBegin;
2742   if (product->type == MATPRODUCT_PtAP) {
2743     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2744   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
2745   PetscFunctionReturn(0);
2746 }
2747 
2748 PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) {
2749   Mat_Product *product = C->product;
2750   PetscBool    flg     = PETSC_FALSE;
2751   Mat          A = product->A, P = product->B;
2752   PetscInt     alg = 1; /* set default algorithm */
2753 #if !defined(PETSC_HAVE_HYPRE)
2754   const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
2755   PetscInt    nalg        = 4;
2756 #else
2757   const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
2758   PetscInt    nalg        = 5;
2759 #endif
2760 
2761   PetscFunctionBegin;
2762   PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]);
2763 
2764   /* PtAP */
2765   /* Check matrix local sizes */
2766   PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2767              A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
2768   PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2769              A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);
2770 
2771   /* Set the default algorithm */
2772   PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
2773   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2774 
2775   /* Get runtime option */
2776   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
2777   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
2778   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2779   PetscOptionsEnd();
2780 
2781   PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg));
2782   if (flg) {
2783     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2784     PetscFunctionReturn(0);
2785   }
2786 
2787   PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg));
2788   if (flg) {
2789     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2790     PetscFunctionReturn(0);
2791   }
2792 
2793   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
2794   PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
2795   PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P));
2796   PetscCall(MatProductSetFromOptions(C));
2797   PetscFunctionReturn(0);
2798 }
2799 
2800 /* ----------------------------------------------------------------*/
2801 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) {
2802   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2803   Mat_SeqMAIJ       *pp = (Mat_SeqMAIJ *)PP->data;
2804   Mat                P  = pp->AIJ;
2805   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
2806   PetscInt          *pti, *ptj, *ptJ;
2807   PetscInt          *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
2808   const PetscInt     an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
2809   PetscInt           i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
2810   MatScalar         *ca;
2811   const PetscInt    *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;
2812 
2813   PetscFunctionBegin;
2814   /* Get ij structure of P^T */
2815   PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
2816 
2817   cn = pn * ppdof;
2818   /* Allocate ci array, arrays for fill computation and */
2819   /* free space for accumulating nonzero column info */
2820   PetscCall(PetscMalloc1(cn + 1, &ci));
2821   ci[0] = 0;
2822 
2823   /* Work arrays for rows of P^T*A */
2824   PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow));
2825   PetscCall(PetscArrayzero(ptadenserow, an));
2826   PetscCall(PetscArrayzero(denserow, cn));
2827 
2828   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2829   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2830   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2831   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space));
2832   current_space = free_space;
2833 
2834   /* Determine symbolic info for each row of C: */
2835   for (i = 0; i < pn; i++) {
2836     ptnzi = pti[i + 1] - pti[i];
2837     ptJ   = ptj + pti[i];
2838     for (dof = 0; dof < ppdof; dof++) {
2839       ptanzi = 0;
2840       /* Determine symbolic row of PtA: */
2841       for (j = 0; j < ptnzi; j++) {
2842         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2843         arow = ptJ[j] * ppdof + dof;
2844         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2845         anzj = ai[arow + 1] - ai[arow];
2846         ajj  = aj + ai[arow];
2847         for (k = 0; k < anzj; k++) {
2848           if (!ptadenserow[ajj[k]]) {
2849             ptadenserow[ajj[k]]    = -1;
2850             ptasparserow[ptanzi++] = ajj[k];
2851           }
2852         }
2853       }
2854       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2855       ptaj = ptasparserow;
2856       cnzi = 0;
2857       for (j = 0; j < ptanzi; j++) {
2858         /* Get offset within block of P */
2859         pshift = *ptaj % ppdof;
2860         /* Get block row of P */
2861         prow   = (*ptaj++) / ppdof; /* integer division */
2862         /* P has same number of nonzeros per row as the compressed form */
2863         pnzj   = pi[prow + 1] - pi[prow];
2864         pjj    = pj + pi[prow];
2865         for (k = 0; k < pnzj; k++) {
2866           /* Locations in C are shifted by the offset within the block */
2867           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2868           if (!denserow[pjj[k] * ppdof + pshift]) {
2869             denserow[pjj[k] * ppdof + pshift] = -1;
2870             sparserow[cnzi++]                 = pjj[k] * ppdof + pshift;
2871           }
2872         }
2873       }
2874 
2875       /* sort sparserow */
2876       PetscCall(PetscSortInt(cnzi, sparserow));
2877 
2878       /* If free space is not available, make more free space */
2879       /* Double the amount of total space in the list */
2880       if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), &current_space));
2881 
2882       /* Copy data into free space, and zero out denserows */
2883       PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi));
2884 
2885       current_space->array += cnzi;
2886       current_space->local_used += cnzi;
2887       current_space->local_remaining -= cnzi;
2888 
2889       for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
2890       for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;
2891 
2892       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2893       /*        For now, we will recompute what is needed. */
2894       ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
2895     }
2896   }
2897   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2898   /* Allocate space for cj, initialize cj, and */
2899   /* destroy list of free space and other temporary array(s) */
2900   PetscCall(PetscMalloc1(ci[cn] + 1, &cj));
2901   PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
2902   PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow));
2903 
2904   /* Allocate space for ca */
2905   PetscCall(PetscCalloc1(ci[cn] + 1, &ca));
2906 
2907   /* put together the new matrix */
2908   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C));
2909   PetscCall(MatSetBlockSize(C, pp->dof));
2910 
2911   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2912   /* Since these are PETSc arrays, change flags to free them as necessary. */
2913   c          = (Mat_SeqAIJ *)(C->data);
2914   c->free_a  = PETSC_TRUE;
2915   c->free_ij = PETSC_TRUE;
2916   c->nonew   = 0;
2917 
2918   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2919   C->ops->productnumeric = MatProductNumeric_PtAP;
2920 
2921   /* Clean up. */
2922   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
2923   PetscFunctionReturn(0);
2924 }
2925 
2926 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) {
2927   /* This routine requires testing -- first draft only */
2928   Mat_SeqMAIJ     *pp = (Mat_SeqMAIJ *)PP->data;
2929   Mat              P  = pp->AIJ;
2930   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
2931   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *)P->data;
2932   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *)C->data;
2933   const PetscInt  *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
2934   const PetscInt  *ci = c->i, *cj = c->j, *cjj;
2935   const PetscInt   am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
2936   PetscInt         i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
2937   const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
2938   MatScalar       *ca = c->a, *caj, *apa;
2939 
2940   PetscFunctionBegin;
2941   /* Allocate temporary array for storage of one row of A*P */
2942   PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense));
2943 
2944   /* Clear old values in C */
2945   PetscCall(PetscArrayzero(ca, ci[cm]));
2946 
2947   for (i = 0; i < am; i++) {
2948     /* Form sparse row of A*P */
2949     anzi  = ai[i + 1] - ai[i];
2950     apnzj = 0;
2951     for (j = 0; j < anzi; j++) {
2952       /* Get offset within block of P */
2953       pshift = *aj % ppdof;
2954       /* Get block row of P */
2955       prow   = *aj++ / ppdof; /* integer division */
2956       pnzj   = pi[prow + 1] - pi[prow];
2957       pjj    = pj + pi[prow];
2958       paj    = pa + pi[prow];
2959       for (k = 0; k < pnzj; k++) {
2960         poffset = pjj[k] * ppdof + pshift;
2961         if (!apjdense[poffset]) {
2962           apjdense[poffset] = -1;
2963           apj[apnzj++]      = poffset;
2964         }
2965         apa[poffset] += (*aa) * paj[k];
2966       }
2967       PetscCall(PetscLogFlops(2.0 * pnzj));
2968       aa++;
2969     }
2970 
2971     /* Sort the j index array for quick sparse axpy. */
2972     /* Note: a array does not need sorting as it is in dense storage locations. */
2973     PetscCall(PetscSortInt(apnzj, apj));
2974 
2975     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2976     prow    = i / ppdof; /* integer division */
2977     pshift  = i % ppdof;
2978     poffset = pi[prow];
2979     pnzi    = pi[prow + 1] - poffset;
2980     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2981     pJ      = pj + poffset;
2982     pA      = pa + poffset;
2983     for (j = 0; j < pnzi; j++) {
2984       crow = (*pJ) * ppdof + pshift;
2985       cjj  = cj + ci[crow];
2986       caj  = ca + ci[crow];
2987       pJ++;
2988       /* Perform sparse axpy operation.  Note cjj includes apj. */
2989       for (k = 0, nextap = 0; nextap < apnzj; k++) {
2990         if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
2991       }
2992       PetscCall(PetscLogFlops(2.0 * apnzj));
2993       pA++;
2994     }
2995 
2996     /* Zero the current row info for A*P */
2997     for (j = 0; j < apnzj; j++) {
2998       apa[apj[j]]      = 0.;
2999       apjdense[apj[j]] = 0;
3000     }
3001   }
3002 
3003   /* Assemble the final matrix and clean up */
3004   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3005   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3006   PetscCall(PetscFree3(apa, apj, apjdense));
3007   PetscFunctionReturn(0);
3008 }
3009 
3010 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) {
3011   Mat_Product *product = C->product;
3012   Mat          A = product->A, P = product->B;
3013 
3014   PetscFunctionBegin;
3015   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C));
3016   PetscFunctionReturn(0);
3017 }
3018 
3019 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A, Mat PP, PetscReal fill, Mat *C) {
3020   PetscFunctionBegin;
3021   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3022 }
3023 
3024 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A, Mat PP, Mat C) {
3025   PetscFunctionBegin;
3026   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3027 }
3028 
3029 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);
3030 
3031 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) {
3032   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3033 
3034   PetscFunctionBegin;
3035 
3036   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C));
3037   PetscFunctionReturn(0);
3038 }
3039 
3040 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);
3041 
3042 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) {
3043   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3044 
3045   PetscFunctionBegin;
3046   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C));
3047   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3048   PetscFunctionReturn(0);
3049 }
3050 
3051 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);
3052 
3053 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) {
3054   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3055 
3056   PetscFunctionBegin;
3057 
3058   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C));
3059   PetscFunctionReturn(0);
3060 }
3061 
3062 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);
3063 
3064 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) {
3065   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3066 
3067   PetscFunctionBegin;
3068 
3069   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C));
3070   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3071   PetscFunctionReturn(0);
3072 }
3073 
3074 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) {
3075   Mat_Product *product = C->product;
3076   Mat          A = product->A, P = product->B;
3077   PetscBool    flg;
3078 
3079   PetscFunctionBegin;
3080   PetscCall(PetscStrcmp(product->alg, "allatonce", &flg));
3081   if (flg) {
3082     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C));
3083     C->ops->productnumeric = MatProductNumeric_PtAP;
3084     PetscFunctionReturn(0);
3085   }
3086 
3087   PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg));
3088   if (flg) {
3089     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C));
3090     C->ops->productnumeric = MatProductNumeric_PtAP;
3091     PetscFunctionReturn(0);
3092   }
3093 
3094   SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
3095 }
3096 
3097 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
3098   Mat_SeqMAIJ *b   = (Mat_SeqMAIJ *)A->data;
3099   Mat          a   = b->AIJ, B;
3100   Mat_SeqAIJ  *aij = (Mat_SeqAIJ *)a->data;
3101   PetscInt     m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
3102   PetscInt    *cols;
3103   PetscScalar *vals;
3104 
3105   PetscFunctionBegin;
3106   PetscCall(MatGetSize(a, &m, &n));
3107   PetscCall(PetscMalloc1(dof * m, &ilen));
3108   for (i = 0; i < m; i++) {
3109     nmax = PetscMax(nmax, aij->ilen[i]);
3110     for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
3111   }
3112   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
3113   PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n));
3114   PetscCall(MatSetType(B, newtype));
3115   PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen));
3116   PetscCall(PetscFree(ilen));
3117   PetscCall(PetscMalloc1(nmax, &icols));
3118   ii = 0;
3119   for (i = 0; i < m; i++) {
3120     PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals));
3121     for (j = 0; j < dof; j++) {
3122       for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
3123       PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
3124       ii++;
3125     }
3126     PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals));
3127   }
3128   PetscCall(PetscFree(icols));
3129   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3130   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3131 
3132   if (reuse == MAT_INPLACE_MATRIX) {
3133     PetscCall(MatHeaderReplace(A, &B));
3134   } else {
3135     *newmat = B;
3136   }
3137   PetscFunctionReturn(0);
3138 }
3139 
3140 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3141 
3142 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
3143   Mat_MPIMAIJ *maij    = (Mat_MPIMAIJ *)A->data;
3144   Mat          MatAIJ  = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
3145   Mat          MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
3146   Mat_SeqAIJ  *AIJ     = (Mat_SeqAIJ *)MatAIJ->data;
3147   Mat_SeqAIJ  *OAIJ    = (Mat_SeqAIJ *)MatOAIJ->data;
3148   Mat_MPIAIJ  *mpiaij  = (Mat_MPIAIJ *)maij->A->data;
3149   PetscInt     dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
3150   PetscInt    *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
3151   PetscInt     rstart, cstart, *garray, ii, k;
3152   PetscScalar *vals, *ovals;
3153 
3154   PetscFunctionBegin;
3155   PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz));
3156   for (i = 0; i < A->rmap->n / dof; i++) {
3157     nmax  = PetscMax(nmax, AIJ->ilen[i]);
3158     onmax = PetscMax(onmax, OAIJ->ilen[i]);
3159     for (j = 0; j < dof; j++) {
3160       dnz[dof * i + j] = AIJ->ilen[i];
3161       onz[dof * i + j] = OAIJ->ilen[i];
3162     }
3163   }
3164   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3165   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3166   PetscCall(MatSetType(B, newtype));
3167   PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz));
3168   PetscCall(MatSetBlockSize(B, dof));
3169   PetscCall(PetscFree2(dnz, onz));
3170 
3171   PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols));
3172   rstart = dof * maij->A->rmap->rstart;
3173   cstart = dof * maij->A->cmap->rstart;
3174   garray = mpiaij->garray;
3175 
3176   ii = rstart;
3177   for (i = 0; i < A->rmap->n / dof; i++) {
3178     PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
3179     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
3180     for (j = 0; j < dof; j++) {
3181       for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j;
3182       for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j;
3183       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
3184       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES));
3185       ii++;
3186     }
3187     PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
3188     PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
3189   }
3190   PetscCall(PetscFree2(icols, oicols));
3191 
3192   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3193   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3194 
3195   if (reuse == MAT_INPLACE_MATRIX) {
3196     PetscInt refct          = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3197     ((PetscObject)A)->refct = 1;
3198 
3199     PetscCall(MatHeaderReplace(A, &B));
3200 
3201     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3202   } else {
3203     *newmat = B;
3204   }
3205   PetscFunctionReturn(0);
3206 }
3207 
3208 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) {
3209   Mat A;
3210 
3211   PetscFunctionBegin;
3212   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
3213   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
3214   PetscCall(MatDestroy(&A));
3215   PetscFunctionReturn(0);
3216 }
3217 
3218 PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) {
3219   Mat A;
3220 
3221   PetscFunctionBegin;
3222   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
3223   PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat));
3224   PetscCall(MatDestroy(&A));
3225   PetscFunctionReturn(0);
3226 }
3227 
3228 /* ---------------------------------------------------------------------------------- */
3229 /*@
3230   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3231   operations for multicomponent problems.  It interpolates each component the same
3232   way independently.  The matrix type is based on `MATSEQAIJ` for sequential matrices,
3233   and `MATMPIAIJ` for distributed matrices.
3234 
3235   Collective
3236 
3237   Input Parameters:
3238 + A - the `MATAIJ` matrix describing the action on blocks
3239 - dof - the block size (number of components per node)
3240 
3241   Output Parameter:
3242 . maij - the new `MATMAIJ` matrix
3243 
3244   Operations provided:
3245 .vb
3246     MatMult()
3247     MatMultTranspose()
3248     MatMultAdd()
3249     MatMultTransposeAdd()
3250     MatView()
3251 .ve
3252 
3253   Level: advanced
3254 
3255 .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ`
3256 @*/
3257 PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) {
3258   PetscInt  n;
3259   Mat       B;
3260   PetscBool flg;
3261 #if defined(PETSC_HAVE_CUDA)
3262   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3263   PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3264 #endif
3265 
3266   PetscFunctionBegin;
3267   dof = PetscAbs(dof);
3268   PetscCall(PetscObjectReference((PetscObject)A));
3269 
3270   if (dof == 1) *maij = A;
3271   else {
3272     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3273     /* propagate vec type */
3274     PetscCall(MatSetVecType(B, A->defaultvectype));
3275     PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N));
3276     PetscCall(PetscLayoutSetBlockSize(B->rmap, dof));
3277     PetscCall(PetscLayoutSetBlockSize(B->cmap, dof));
3278     PetscCall(PetscLayoutSetUp(B->rmap));
3279     PetscCall(PetscLayoutSetUp(B->cmap));
3280 
3281     B->assembled = PETSC_TRUE;
3282 
3283     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
3284     if (flg) {
3285       Mat_SeqMAIJ *b;
3286 
3287       PetscCall(MatSetType(B, MATSEQMAIJ));
3288 
3289       B->ops->setup   = NULL;
3290       B->ops->destroy = MatDestroy_SeqMAIJ;
3291       B->ops->view    = MatView_SeqMAIJ;
3292 
3293       b      = (Mat_SeqMAIJ *)B->data;
3294       b->dof = dof;
3295       b->AIJ = A;
3296 
3297       if (dof == 2) {
3298         B->ops->mult             = MatMult_SeqMAIJ_2;
3299         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3300         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3301         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3302       } else if (dof == 3) {
3303         B->ops->mult             = MatMult_SeqMAIJ_3;
3304         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3305         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3306         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3307       } else if (dof == 4) {
3308         B->ops->mult             = MatMult_SeqMAIJ_4;
3309         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3310         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3311         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3312       } else if (dof == 5) {
3313         B->ops->mult             = MatMult_SeqMAIJ_5;
3314         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3315         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3316         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3317       } else if (dof == 6) {
3318         B->ops->mult             = MatMult_SeqMAIJ_6;
3319         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3320         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3321         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3322       } else if (dof == 7) {
3323         B->ops->mult             = MatMult_SeqMAIJ_7;
3324         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3325         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3326         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3327       } else if (dof == 8) {
3328         B->ops->mult             = MatMult_SeqMAIJ_8;
3329         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3330         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3331         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3332       } else if (dof == 9) {
3333         B->ops->mult             = MatMult_SeqMAIJ_9;
3334         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3335         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3336         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3337       } else if (dof == 10) {
3338         B->ops->mult             = MatMult_SeqMAIJ_10;
3339         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3340         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3341         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3342       } else if (dof == 11) {
3343         B->ops->mult             = MatMult_SeqMAIJ_11;
3344         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3345         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3346         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3347       } else if (dof == 16) {
3348         B->ops->mult             = MatMult_SeqMAIJ_16;
3349         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3350         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3351         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3352       } else if (dof == 18) {
3353         B->ops->mult             = MatMult_SeqMAIJ_18;
3354         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3355         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3356         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3357       } else {
3358         B->ops->mult             = MatMult_SeqMAIJ_N;
3359         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3360         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3361         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3362       }
3363 #if defined(PETSC_HAVE_CUDA)
3364       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ));
3365 #endif
3366       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ));
3367       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
3368     } else {
3369       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3370       Mat_MPIMAIJ *b;
3371       IS           from, to;
3372       Vec          gvec;
3373 
3374       PetscCall(MatSetType(B, MATMPIMAIJ));
3375 
3376       B->ops->setup   = NULL;
3377       B->ops->destroy = MatDestroy_MPIMAIJ;
3378       B->ops->view    = MatView_MPIMAIJ;
3379 
3380       b      = (Mat_MPIMAIJ *)B->data;
3381       b->dof = dof;
3382       b->A   = A;
3383 
3384       PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ));
3385       PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ));
3386 
3387       PetscCall(VecGetSize(mpiaij->lvec, &n));
3388       PetscCall(VecCreate(PETSC_COMM_SELF, &b->w));
3389       PetscCall(VecSetSizes(b->w, n * dof, n * dof));
3390       PetscCall(VecSetBlockSize(b->w, dof));
3391       PetscCall(VecSetType(b->w, VECSEQ));
3392 
3393       /* create two temporary Index sets for build scatter gather */
3394       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
3395       PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to));
3396 
3397       /* create temporary global vector to generate scatter context */
3398       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec));
3399 
3400       /* generate the scatter context */
3401       PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx));
3402 
3403       PetscCall(ISDestroy(&from));
3404       PetscCall(ISDestroy(&to));
3405       PetscCall(VecDestroy(&gvec));
3406 
3407       B->ops->mult             = MatMult_MPIMAIJ_dof;
3408       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3409       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3410       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3411 
3412 #if defined(PETSC_HAVE_CUDA)
3413       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ));
3414 #endif
3415       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ));
3416       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
3417     }
3418     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3419     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3420     PetscCall(MatSetUp(B));
3421 #if defined(PETSC_HAVE_CUDA)
3422     /* temporary until we have CUDA implementation of MAIJ */
3423     {
3424       PetscBool flg;
3425       if (convert) {
3426         PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, ""));
3427         if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B));
3428       }
3429     }
3430 #endif
3431     *maij = B;
3432   }
3433   PetscFunctionReturn(0);
3434 }
3435