xref: /petsc/src/mat/impls/maij/maij.c (revision 117ef88edefbfc12e7c19efe87a19a2e1b0acd4f)
1 
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20 #include <../src/mat/utils/freespace.h>
21 
22 /*@
23    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
24 
25    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
26 
27    Input Parameter:
28 .  A - the MAIJ matrix
29 
30    Output Parameter:
31 .  B - the AIJ matrix
32 
33    Level: advanced
34 
35    Notes:
36     The reference count on the AIJ matrix is not increased so you should not destroy it.
37 
38 .seealso: MatCreateMAIJ()
39 @*/
40 PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
41 {
42   PetscErrorCode ierr;
43   PetscBool      ismpimaij,isseqmaij;
44 
45   PetscFunctionBegin;
46   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
47   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
48   if (ismpimaij) {
49     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
50 
51     *B = b->A;
52   } else if (isseqmaij) {
53     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
54 
55     *B = b->AIJ;
56   } else {
57     *B = A;
58   }
59   PetscFunctionReturn(0);
60 }
61 
62 /*@
63    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
64 
65    Logically Collective
66 
67    Input Parameter:
68 +  A - the MAIJ matrix
69 -  dof - the block size for the new matrix
70 
71    Output Parameter:
72 .  B - the new MAIJ matrix
73 
74    Level: advanced
75 
76 .seealso: MatCreateMAIJ()
77 @*/
78 PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
79 {
80   PetscErrorCode ierr;
81   Mat            Aij = NULL;
82 
83   PetscFunctionBegin;
84   PetscValidLogicalCollectiveInt(A,dof,2);
85   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
86   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
91 {
92   PetscErrorCode ierr;
93   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
94 
95   PetscFunctionBegin;
96   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
97   ierr = PetscFree(A->data);CHKERRQ(ierr);
98   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr);
99   ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL);CHKERRQ(ierr);
100   PetscFunctionReturn(0);
101 }
102 
103 PetscErrorCode MatSetUp_MAIJ(Mat A)
104 {
105   PetscFunctionBegin;
106   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
107   PetscFunctionReturn(0);
108 }
109 
110 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
111 {
112   PetscErrorCode ierr;
113   Mat            B;
114 
115   PetscFunctionBegin;
116   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
117   ierr = MatView(B,viewer);CHKERRQ(ierr);
118   ierr = MatDestroy(&B);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
123 {
124   PetscErrorCode ierr;
125   Mat            B;
126 
127   PetscFunctionBegin;
128   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
129   ierr = MatView(B,viewer);CHKERRQ(ierr);
130   ierr = MatDestroy(&B);CHKERRQ(ierr);
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
135 {
136   PetscErrorCode ierr;
137   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
138 
139   PetscFunctionBegin;
140   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
141   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
142   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
143   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
144   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
145   ierr = PetscFree(A->data);CHKERRQ(ierr);
146   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr);
147   ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr);
148   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 /*MC
153   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
154   multicomponent problems, interpolating or restricting each component the same way independently.
155   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
156 
157   Operations provided:
158 + MatMult
159 . MatMultTranspose
160 . MatMultAdd
161 - MatMultTransposeAdd
162 
163   Level: advanced
164 
165 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
166 M*/
167 
168 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
169 {
170   PetscErrorCode ierr;
171   Mat_MPIMAIJ    *b;
172   PetscMPIInt    size;
173 
174   PetscFunctionBegin;
175   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
176   A->data  = (void*)b;
177 
178   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
179 
180   A->ops->setup = MatSetUp_MAIJ;
181 
182   b->AIJ  = 0;
183   b->dof  = 0;
184   b->OAIJ = 0;
185   b->ctx  = 0;
186   b->w    = 0;
187   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
188   if (size == 1) {
189     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
190   } else {
191     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
192   }
193   A->preallocated  = PETSC_TRUE;
194   A->assembled     = PETSC_TRUE;
195   PetscFunctionReturn(0);
196 }
197 
198 /* --------------------------------------------------------------------------------------*/
199 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
200 {
201   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
202   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
203   const PetscScalar *x,*v;
204   PetscScalar       *y, sum1, sum2;
205   PetscErrorCode    ierr;
206   PetscInt          nonzerorow=0,n,i,jrow,j;
207   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
208 
209   PetscFunctionBegin;
210   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
211   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
212   idx  = a->j;
213   v    = a->a;
214   ii   = a->i;
215 
216   for (i=0; i<m; i++) {
217     jrow  = ii[i];
218     n     = ii[i+1] - jrow;
219     sum1  = 0.0;
220     sum2  = 0.0;
221 
222     nonzerorow += (n>0);
223     for (j=0; j<n; j++) {
224       sum1 += v[jrow]*x[2*idx[jrow]];
225       sum2 += v[jrow]*x[2*idx[jrow]+1];
226       jrow++;
227     }
228     y[2*i]   = sum1;
229     y[2*i+1] = sum2;
230   }
231 
232   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
233   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
234   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
235   PetscFunctionReturn(0);
236 }
237 
238 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
239 {
240   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
241   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
242   const PetscScalar *x,*v;
243   PetscScalar       *y,alpha1,alpha2;
244   PetscErrorCode    ierr;
245   PetscInt          n,i;
246   const PetscInt    m = b->AIJ->rmap->n,*idx;
247 
248   PetscFunctionBegin;
249   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
250   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
251   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
252 
253   for (i=0; i<m; i++) {
254     idx    = a->j + a->i[i];
255     v      = a->a + a->i[i];
256     n      = a->i[i+1] - a->i[i];
257     alpha1 = x[2*i];
258     alpha2 = x[2*i+1];
259     while (n-->0) {
260       y[2*(*idx)]   += alpha1*(*v);
261       y[2*(*idx)+1] += alpha2*(*v);
262       idx++; v++;
263     }
264   }
265   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
266   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
267   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
268   PetscFunctionReturn(0);
269 }
270 
271 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
272 {
273   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
274   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
275   const PetscScalar *x,*v;
276   PetscScalar       *y,sum1, sum2;
277   PetscErrorCode    ierr;
278   PetscInt          n,i,jrow,j;
279   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
280 
281   PetscFunctionBegin;
282   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
283   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
284   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
285   idx  = a->j;
286   v    = a->a;
287   ii   = a->i;
288 
289   for (i=0; i<m; i++) {
290     jrow = ii[i];
291     n    = ii[i+1] - jrow;
292     sum1 = 0.0;
293     sum2 = 0.0;
294     for (j=0; j<n; j++) {
295       sum1 += v[jrow]*x[2*idx[jrow]];
296       sum2 += v[jrow]*x[2*idx[jrow]+1];
297       jrow++;
298     }
299     y[2*i]   += sum1;
300     y[2*i+1] += sum2;
301   }
302 
303   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
304   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
305   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
309 {
310   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
311   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
312   const PetscScalar *x,*v;
313   PetscScalar       *y,alpha1,alpha2;
314   PetscErrorCode    ierr;
315   PetscInt          n,i;
316   const PetscInt    m = b->AIJ->rmap->n,*idx;
317 
318   PetscFunctionBegin;
319   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
320   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
321   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
322 
323   for (i=0; i<m; i++) {
324     idx    = a->j + a->i[i];
325     v      = a->a + a->i[i];
326     n      = a->i[i+1] - a->i[i];
327     alpha1 = x[2*i];
328     alpha2 = x[2*i+1];
329     while (n-->0) {
330       y[2*(*idx)]   += alpha1*(*v);
331       y[2*(*idx)+1] += alpha2*(*v);
332       idx++; v++;
333     }
334   }
335   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
336   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
337   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
338   PetscFunctionReturn(0);
339 }
340 /* --------------------------------------------------------------------------------------*/
341 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
342 {
343   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
344   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
345   const PetscScalar *x,*v;
346   PetscScalar       *y,sum1, sum2, sum3;
347   PetscErrorCode    ierr;
348   PetscInt          nonzerorow=0,n,i,jrow,j;
349   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
350 
351   PetscFunctionBegin;
352   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
353   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
354   idx  = a->j;
355   v    = a->a;
356   ii   = a->i;
357 
358   for (i=0; i<m; i++) {
359     jrow  = ii[i];
360     n     = ii[i+1] - jrow;
361     sum1  = 0.0;
362     sum2  = 0.0;
363     sum3  = 0.0;
364 
365     nonzerorow += (n>0);
366     for (j=0; j<n; j++) {
367       sum1 += v[jrow]*x[3*idx[jrow]];
368       sum2 += v[jrow]*x[3*idx[jrow]+1];
369       sum3 += v[jrow]*x[3*idx[jrow]+2];
370       jrow++;
371      }
372     y[3*i]   = sum1;
373     y[3*i+1] = sum2;
374     y[3*i+2] = sum3;
375   }
376 
377   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
378   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
379   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
380   PetscFunctionReturn(0);
381 }
382 
383 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
384 {
385   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
386   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
387   const PetscScalar *x,*v;
388   PetscScalar       *y,alpha1,alpha2,alpha3;
389   PetscErrorCode    ierr;
390   PetscInt          n,i;
391   const PetscInt    m = b->AIJ->rmap->n,*idx;
392 
393   PetscFunctionBegin;
394   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
395   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
396   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
397 
398   for (i=0; i<m; i++) {
399     idx    = a->j + a->i[i];
400     v      = a->a + a->i[i];
401     n      = a->i[i+1] - a->i[i];
402     alpha1 = x[3*i];
403     alpha2 = x[3*i+1];
404     alpha3 = x[3*i+2];
405     while (n-->0) {
406       y[3*(*idx)]   += alpha1*(*v);
407       y[3*(*idx)+1] += alpha2*(*v);
408       y[3*(*idx)+2] += alpha3*(*v);
409       idx++; v++;
410     }
411   }
412   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
413   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
414   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
419 {
420   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
421   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
422   const PetscScalar *x,*v;
423   PetscScalar       *y,sum1, sum2, sum3;
424   PetscErrorCode    ierr;
425   PetscInt          n,i,jrow,j;
426   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
427 
428   PetscFunctionBegin;
429   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
430   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
431   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
432   idx  = a->j;
433   v    = a->a;
434   ii   = a->i;
435 
436   for (i=0; i<m; i++) {
437     jrow = ii[i];
438     n    = ii[i+1] - jrow;
439     sum1 = 0.0;
440     sum2 = 0.0;
441     sum3 = 0.0;
442     for (j=0; j<n; j++) {
443       sum1 += v[jrow]*x[3*idx[jrow]];
444       sum2 += v[jrow]*x[3*idx[jrow]+1];
445       sum3 += v[jrow]*x[3*idx[jrow]+2];
446       jrow++;
447     }
448     y[3*i]   += sum1;
449     y[3*i+1] += sum2;
450     y[3*i+2] += sum3;
451   }
452 
453   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
454   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
455   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
456   PetscFunctionReturn(0);
457 }
458 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
459 {
460   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
461   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
462   const PetscScalar *x,*v;
463   PetscScalar       *y,alpha1,alpha2,alpha3;
464   PetscErrorCode    ierr;
465   PetscInt          n,i;
466   const PetscInt    m = b->AIJ->rmap->n,*idx;
467 
468   PetscFunctionBegin;
469   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
470   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
471   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
472   for (i=0; i<m; i++) {
473     idx    = a->j + a->i[i];
474     v      = a->a + a->i[i];
475     n      = a->i[i+1] - a->i[i];
476     alpha1 = x[3*i];
477     alpha2 = x[3*i+1];
478     alpha3 = x[3*i+2];
479     while (n-->0) {
480       y[3*(*idx)]   += alpha1*(*v);
481       y[3*(*idx)+1] += alpha2*(*v);
482       y[3*(*idx)+2] += alpha3*(*v);
483       idx++; v++;
484     }
485   }
486   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
487   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
488   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
489   PetscFunctionReturn(0);
490 }
491 
492 /* ------------------------------------------------------------------------------*/
493 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
494 {
495   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
496   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
497   const PetscScalar *x,*v;
498   PetscScalar       *y,sum1, sum2, sum3, sum4;
499   PetscErrorCode    ierr;
500   PetscInt          nonzerorow=0,n,i,jrow,j;
501   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
502 
503   PetscFunctionBegin;
504   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
505   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
506   idx  = a->j;
507   v    = a->a;
508   ii   = a->i;
509 
510   for (i=0; i<m; i++) {
511     jrow        = ii[i];
512     n           = ii[i+1] - jrow;
513     sum1        = 0.0;
514     sum2        = 0.0;
515     sum3        = 0.0;
516     sum4        = 0.0;
517     nonzerorow += (n>0);
518     for (j=0; j<n; j++) {
519       sum1 += v[jrow]*x[4*idx[jrow]];
520       sum2 += v[jrow]*x[4*idx[jrow]+1];
521       sum3 += v[jrow]*x[4*idx[jrow]+2];
522       sum4 += v[jrow]*x[4*idx[jrow]+3];
523       jrow++;
524     }
525     y[4*i]   = sum1;
526     y[4*i+1] = sum2;
527     y[4*i+2] = sum3;
528     y[4*i+3] = sum4;
529   }
530 
531   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
532   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
533   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
534   PetscFunctionReturn(0);
535 }
536 
537 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
538 {
539   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
540   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
541   const PetscScalar *x,*v;
542   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
543   PetscErrorCode    ierr;
544   PetscInt          n,i;
545   const PetscInt    m = b->AIJ->rmap->n,*idx;
546 
547   PetscFunctionBegin;
548   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
549   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
550   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
551   for (i=0; i<m; i++) {
552     idx    = a->j + a->i[i];
553     v      = a->a + a->i[i];
554     n      = a->i[i+1] - a->i[i];
555     alpha1 = x[4*i];
556     alpha2 = x[4*i+1];
557     alpha3 = x[4*i+2];
558     alpha4 = x[4*i+3];
559     while (n-->0) {
560       y[4*(*idx)]   += alpha1*(*v);
561       y[4*(*idx)+1] += alpha2*(*v);
562       y[4*(*idx)+2] += alpha3*(*v);
563       y[4*(*idx)+3] += alpha4*(*v);
564       idx++; v++;
565     }
566   }
567   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
568   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
569   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
570   PetscFunctionReturn(0);
571 }
572 
573 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
574 {
575   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
576   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
577   const PetscScalar *x,*v;
578   PetscScalar       *y,sum1, sum2, sum3, sum4;
579   PetscErrorCode    ierr;
580   PetscInt          n,i,jrow,j;
581   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
582 
583   PetscFunctionBegin;
584   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
585   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
586   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
587   idx  = a->j;
588   v    = a->a;
589   ii   = a->i;
590 
591   for (i=0; i<m; i++) {
592     jrow = ii[i];
593     n    = ii[i+1] - jrow;
594     sum1 = 0.0;
595     sum2 = 0.0;
596     sum3 = 0.0;
597     sum4 = 0.0;
598     for (j=0; j<n; j++) {
599       sum1 += v[jrow]*x[4*idx[jrow]];
600       sum2 += v[jrow]*x[4*idx[jrow]+1];
601       sum3 += v[jrow]*x[4*idx[jrow]+2];
602       sum4 += v[jrow]*x[4*idx[jrow]+3];
603       jrow++;
604     }
605     y[4*i]   += sum1;
606     y[4*i+1] += sum2;
607     y[4*i+2] += sum3;
608     y[4*i+3] += sum4;
609   }
610 
611   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
612   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
613   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
614   PetscFunctionReturn(0);
615 }
616 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
617 {
618   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
619   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
620   const PetscScalar *x,*v;
621   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
622   PetscErrorCode    ierr;
623   PetscInt          n,i;
624   const PetscInt    m = b->AIJ->rmap->n,*idx;
625 
626   PetscFunctionBegin;
627   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
628   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
629   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
630 
631   for (i=0; i<m; i++) {
632     idx    = a->j + a->i[i];
633     v      = a->a + a->i[i];
634     n      = a->i[i+1] - a->i[i];
635     alpha1 = x[4*i];
636     alpha2 = x[4*i+1];
637     alpha3 = x[4*i+2];
638     alpha4 = x[4*i+3];
639     while (n-->0) {
640       y[4*(*idx)]   += alpha1*(*v);
641       y[4*(*idx)+1] += alpha2*(*v);
642       y[4*(*idx)+2] += alpha3*(*v);
643       y[4*(*idx)+3] += alpha4*(*v);
644       idx++; v++;
645     }
646   }
647   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
648   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
649   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 /* ------------------------------------------------------------------------------*/
653 
654 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
655 {
656   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
657   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
658   const PetscScalar *x,*v;
659   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
660   PetscErrorCode    ierr;
661   PetscInt          nonzerorow=0,n,i,jrow,j;
662   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
663 
664   PetscFunctionBegin;
665   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
666   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
667   idx  = a->j;
668   v    = a->a;
669   ii   = a->i;
670 
671   for (i=0; i<m; i++) {
672     jrow  = ii[i];
673     n     = ii[i+1] - jrow;
674     sum1  = 0.0;
675     sum2  = 0.0;
676     sum3  = 0.0;
677     sum4  = 0.0;
678     sum5  = 0.0;
679 
680     nonzerorow += (n>0);
681     for (j=0; j<n; j++) {
682       sum1 += v[jrow]*x[5*idx[jrow]];
683       sum2 += v[jrow]*x[5*idx[jrow]+1];
684       sum3 += v[jrow]*x[5*idx[jrow]+2];
685       sum4 += v[jrow]*x[5*idx[jrow]+3];
686       sum5 += v[jrow]*x[5*idx[jrow]+4];
687       jrow++;
688     }
689     y[5*i]   = sum1;
690     y[5*i+1] = sum2;
691     y[5*i+2] = sum3;
692     y[5*i+3] = sum4;
693     y[5*i+4] = sum5;
694   }
695 
696   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
697   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
698   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
699   PetscFunctionReturn(0);
700 }
701 
702 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
703 {
704   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
705   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
706   const PetscScalar *x,*v;
707   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
708   PetscErrorCode    ierr;
709   PetscInt          n,i;
710   const PetscInt    m = b->AIJ->rmap->n,*idx;
711 
712   PetscFunctionBegin;
713   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
714   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
715   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
716 
717   for (i=0; i<m; i++) {
718     idx    = a->j + a->i[i];
719     v      = a->a + a->i[i];
720     n      = a->i[i+1] - a->i[i];
721     alpha1 = x[5*i];
722     alpha2 = x[5*i+1];
723     alpha3 = x[5*i+2];
724     alpha4 = x[5*i+3];
725     alpha5 = x[5*i+4];
726     while (n-->0) {
727       y[5*(*idx)]   += alpha1*(*v);
728       y[5*(*idx)+1] += alpha2*(*v);
729       y[5*(*idx)+2] += alpha3*(*v);
730       y[5*(*idx)+3] += alpha4*(*v);
731       y[5*(*idx)+4] += alpha5*(*v);
732       idx++; v++;
733     }
734   }
735   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
736   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
737   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
738   PetscFunctionReturn(0);
739 }
740 
741 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
742 {
743   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
744   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
745   const PetscScalar *x,*v;
746   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
747   PetscErrorCode    ierr;
748   PetscInt          n,i,jrow,j;
749   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
750 
751   PetscFunctionBegin;
752   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
753   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
754   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
755   idx  = a->j;
756   v    = a->a;
757   ii   = a->i;
758 
759   for (i=0; i<m; i++) {
760     jrow = ii[i];
761     n    = ii[i+1] - jrow;
762     sum1 = 0.0;
763     sum2 = 0.0;
764     sum3 = 0.0;
765     sum4 = 0.0;
766     sum5 = 0.0;
767     for (j=0; j<n; j++) {
768       sum1 += v[jrow]*x[5*idx[jrow]];
769       sum2 += v[jrow]*x[5*idx[jrow]+1];
770       sum3 += v[jrow]*x[5*idx[jrow]+2];
771       sum4 += v[jrow]*x[5*idx[jrow]+3];
772       sum5 += v[jrow]*x[5*idx[jrow]+4];
773       jrow++;
774     }
775     y[5*i]   += sum1;
776     y[5*i+1] += sum2;
777     y[5*i+2] += sum3;
778     y[5*i+3] += sum4;
779     y[5*i+4] += sum5;
780   }
781 
782   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
783   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
784   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 
788 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
789 {
790   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
791   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
792   const PetscScalar *x,*v;
793   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
794   PetscErrorCode    ierr;
795   PetscInt          n,i;
796   const PetscInt    m = b->AIJ->rmap->n,*idx;
797 
798   PetscFunctionBegin;
799   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
800   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
801   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
802 
803   for (i=0; i<m; i++) {
804     idx    = a->j + a->i[i];
805     v      = a->a + a->i[i];
806     n      = a->i[i+1] - a->i[i];
807     alpha1 = x[5*i];
808     alpha2 = x[5*i+1];
809     alpha3 = x[5*i+2];
810     alpha4 = x[5*i+3];
811     alpha5 = x[5*i+4];
812     while (n-->0) {
813       y[5*(*idx)]   += alpha1*(*v);
814       y[5*(*idx)+1] += alpha2*(*v);
815       y[5*(*idx)+2] += alpha3*(*v);
816       y[5*(*idx)+3] += alpha4*(*v);
817       y[5*(*idx)+4] += alpha5*(*v);
818       idx++; v++;
819     }
820   }
821   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
822   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
823   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
824   PetscFunctionReturn(0);
825 }
826 
827 /* ------------------------------------------------------------------------------*/
828 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
829 {
830   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
831   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
832   const PetscScalar *x,*v;
833   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
834   PetscErrorCode    ierr;
835   PetscInt          nonzerorow=0,n,i,jrow,j;
836   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
837 
838   PetscFunctionBegin;
839   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
840   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
841   idx  = a->j;
842   v    = a->a;
843   ii   = a->i;
844 
845   for (i=0; i<m; i++) {
846     jrow  = ii[i];
847     n     = ii[i+1] - jrow;
848     sum1  = 0.0;
849     sum2  = 0.0;
850     sum3  = 0.0;
851     sum4  = 0.0;
852     sum5  = 0.0;
853     sum6  = 0.0;
854 
855     nonzerorow += (n>0);
856     for (j=0; j<n; j++) {
857       sum1 += v[jrow]*x[6*idx[jrow]];
858       sum2 += v[jrow]*x[6*idx[jrow]+1];
859       sum3 += v[jrow]*x[6*idx[jrow]+2];
860       sum4 += v[jrow]*x[6*idx[jrow]+3];
861       sum5 += v[jrow]*x[6*idx[jrow]+4];
862       sum6 += v[jrow]*x[6*idx[jrow]+5];
863       jrow++;
864     }
865     y[6*i]   = sum1;
866     y[6*i+1] = sum2;
867     y[6*i+2] = sum3;
868     y[6*i+3] = sum4;
869     y[6*i+4] = sum5;
870     y[6*i+5] = sum6;
871   }
872 
873   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
874   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
875   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
876   PetscFunctionReturn(0);
877 }
878 
879 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
880 {
881   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
882   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
883   const PetscScalar *x,*v;
884   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
885   PetscErrorCode    ierr;
886   PetscInt          n,i;
887   const PetscInt    m = b->AIJ->rmap->n,*idx;
888 
889   PetscFunctionBegin;
890   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
891   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
892   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
893 
894   for (i=0; i<m; i++) {
895     idx    = a->j + a->i[i];
896     v      = a->a + a->i[i];
897     n      = a->i[i+1] - a->i[i];
898     alpha1 = x[6*i];
899     alpha2 = x[6*i+1];
900     alpha3 = x[6*i+2];
901     alpha4 = x[6*i+3];
902     alpha5 = x[6*i+4];
903     alpha6 = x[6*i+5];
904     while (n-->0) {
905       y[6*(*idx)]   += alpha1*(*v);
906       y[6*(*idx)+1] += alpha2*(*v);
907       y[6*(*idx)+2] += alpha3*(*v);
908       y[6*(*idx)+3] += alpha4*(*v);
909       y[6*(*idx)+4] += alpha5*(*v);
910       y[6*(*idx)+5] += alpha6*(*v);
911       idx++; v++;
912     }
913   }
914   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
915   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
916   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
917   PetscFunctionReturn(0);
918 }
919 
920 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
921 {
922   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
923   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
924   const PetscScalar *x,*v;
925   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
926   PetscErrorCode    ierr;
927   PetscInt          n,i,jrow,j;
928   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
929 
930   PetscFunctionBegin;
931   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
932   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
933   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
934   idx  = a->j;
935   v    = a->a;
936   ii   = a->i;
937 
938   for (i=0; i<m; i++) {
939     jrow = ii[i];
940     n    = ii[i+1] - jrow;
941     sum1 = 0.0;
942     sum2 = 0.0;
943     sum3 = 0.0;
944     sum4 = 0.0;
945     sum5 = 0.0;
946     sum6 = 0.0;
947     for (j=0; j<n; j++) {
948       sum1 += v[jrow]*x[6*idx[jrow]];
949       sum2 += v[jrow]*x[6*idx[jrow]+1];
950       sum3 += v[jrow]*x[6*idx[jrow]+2];
951       sum4 += v[jrow]*x[6*idx[jrow]+3];
952       sum5 += v[jrow]*x[6*idx[jrow]+4];
953       sum6 += v[jrow]*x[6*idx[jrow]+5];
954       jrow++;
955     }
956     y[6*i]   += sum1;
957     y[6*i+1] += sum2;
958     y[6*i+2] += sum3;
959     y[6*i+3] += sum4;
960     y[6*i+4] += sum5;
961     y[6*i+5] += sum6;
962   }
963 
964   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
965   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
966   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
967   PetscFunctionReturn(0);
968 }
969 
970 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
971 {
972   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
973   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
974   const PetscScalar *x,*v;
975   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
976   PetscErrorCode    ierr;
977   PetscInt          n,i;
978   const PetscInt    m = b->AIJ->rmap->n,*idx;
979 
980   PetscFunctionBegin;
981   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
982   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
983   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
984 
985   for (i=0; i<m; i++) {
986     idx    = a->j + a->i[i];
987     v      = a->a + a->i[i];
988     n      = a->i[i+1] - a->i[i];
989     alpha1 = x[6*i];
990     alpha2 = x[6*i+1];
991     alpha3 = x[6*i+2];
992     alpha4 = x[6*i+3];
993     alpha5 = x[6*i+4];
994     alpha6 = x[6*i+5];
995     while (n-->0) {
996       y[6*(*idx)]   += alpha1*(*v);
997       y[6*(*idx)+1] += alpha2*(*v);
998       y[6*(*idx)+2] += alpha3*(*v);
999       y[6*(*idx)+3] += alpha4*(*v);
1000       y[6*(*idx)+4] += alpha5*(*v);
1001       y[6*(*idx)+5] += alpha6*(*v);
1002       idx++; v++;
1003     }
1004   }
1005   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1006   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1007   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 /* ------------------------------------------------------------------------------*/
1012 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1013 {
1014   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1015   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1016   const PetscScalar *x,*v;
1017   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1018   PetscErrorCode    ierr;
1019   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1020   PetscInt          nonzerorow=0,n,i,jrow,j;
1021 
1022   PetscFunctionBegin;
1023   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1024   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1025   idx  = a->j;
1026   v    = a->a;
1027   ii   = a->i;
1028 
1029   for (i=0; i<m; i++) {
1030     jrow  = ii[i];
1031     n     = ii[i+1] - jrow;
1032     sum1  = 0.0;
1033     sum2  = 0.0;
1034     sum3  = 0.0;
1035     sum4  = 0.0;
1036     sum5  = 0.0;
1037     sum6  = 0.0;
1038     sum7  = 0.0;
1039 
1040     nonzerorow += (n>0);
1041     for (j=0; j<n; j++) {
1042       sum1 += v[jrow]*x[7*idx[jrow]];
1043       sum2 += v[jrow]*x[7*idx[jrow]+1];
1044       sum3 += v[jrow]*x[7*idx[jrow]+2];
1045       sum4 += v[jrow]*x[7*idx[jrow]+3];
1046       sum5 += v[jrow]*x[7*idx[jrow]+4];
1047       sum6 += v[jrow]*x[7*idx[jrow]+5];
1048       sum7 += v[jrow]*x[7*idx[jrow]+6];
1049       jrow++;
1050     }
1051     y[7*i]   = sum1;
1052     y[7*i+1] = sum2;
1053     y[7*i+2] = sum3;
1054     y[7*i+3] = sum4;
1055     y[7*i+4] = sum5;
1056     y[7*i+5] = sum6;
1057     y[7*i+6] = sum7;
1058   }
1059 
1060   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
1061   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1062   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1067 {
1068   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1069   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1070   const PetscScalar *x,*v;
1071   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1072   PetscErrorCode    ierr;
1073   const PetscInt    m = b->AIJ->rmap->n,*idx;
1074   PetscInt          n,i;
1075 
1076   PetscFunctionBegin;
1077   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1078   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1079   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1080 
1081   for (i=0; i<m; i++) {
1082     idx    = a->j + a->i[i];
1083     v      = a->a + a->i[i];
1084     n      = a->i[i+1] - a->i[i];
1085     alpha1 = x[7*i];
1086     alpha2 = x[7*i+1];
1087     alpha3 = x[7*i+2];
1088     alpha4 = x[7*i+3];
1089     alpha5 = x[7*i+4];
1090     alpha6 = x[7*i+5];
1091     alpha7 = x[7*i+6];
1092     while (n-->0) {
1093       y[7*(*idx)]   += alpha1*(*v);
1094       y[7*(*idx)+1] += alpha2*(*v);
1095       y[7*(*idx)+2] += alpha3*(*v);
1096       y[7*(*idx)+3] += alpha4*(*v);
1097       y[7*(*idx)+4] += alpha5*(*v);
1098       y[7*(*idx)+5] += alpha6*(*v);
1099       y[7*(*idx)+6] += alpha7*(*v);
1100       idx++; v++;
1101     }
1102   }
1103   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1104   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1105   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1110 {
1111   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1112   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1113   const PetscScalar *x,*v;
1114   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1115   PetscErrorCode    ierr;
1116   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1117   PetscInt          n,i,jrow,j;
1118 
1119   PetscFunctionBegin;
1120   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1121   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1122   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1123   idx  = a->j;
1124   v    = a->a;
1125   ii   = a->i;
1126 
1127   for (i=0; i<m; i++) {
1128     jrow = ii[i];
1129     n    = ii[i+1] - jrow;
1130     sum1 = 0.0;
1131     sum2 = 0.0;
1132     sum3 = 0.0;
1133     sum4 = 0.0;
1134     sum5 = 0.0;
1135     sum6 = 0.0;
1136     sum7 = 0.0;
1137     for (j=0; j<n; j++) {
1138       sum1 += v[jrow]*x[7*idx[jrow]];
1139       sum2 += v[jrow]*x[7*idx[jrow]+1];
1140       sum3 += v[jrow]*x[7*idx[jrow]+2];
1141       sum4 += v[jrow]*x[7*idx[jrow]+3];
1142       sum5 += v[jrow]*x[7*idx[jrow]+4];
1143       sum6 += v[jrow]*x[7*idx[jrow]+5];
1144       sum7 += v[jrow]*x[7*idx[jrow]+6];
1145       jrow++;
1146     }
1147     y[7*i]   += sum1;
1148     y[7*i+1] += sum2;
1149     y[7*i+2] += sum3;
1150     y[7*i+3] += sum4;
1151     y[7*i+4] += sum5;
1152     y[7*i+5] += sum6;
1153     y[7*i+6] += sum7;
1154   }
1155 
1156   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1157   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1158   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1163 {
1164   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1165   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1166   const PetscScalar *x,*v;
1167   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1168   PetscErrorCode    ierr;
1169   const PetscInt    m = b->AIJ->rmap->n,*idx;
1170   PetscInt          n,i;
1171 
1172   PetscFunctionBegin;
1173   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1174   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1175   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1176   for (i=0; i<m; i++) {
1177     idx    = a->j + a->i[i];
1178     v      = a->a + a->i[i];
1179     n      = a->i[i+1] - a->i[i];
1180     alpha1 = x[7*i];
1181     alpha2 = x[7*i+1];
1182     alpha3 = x[7*i+2];
1183     alpha4 = x[7*i+3];
1184     alpha5 = x[7*i+4];
1185     alpha6 = x[7*i+5];
1186     alpha7 = x[7*i+6];
1187     while (n-->0) {
1188       y[7*(*idx)]   += alpha1*(*v);
1189       y[7*(*idx)+1] += alpha2*(*v);
1190       y[7*(*idx)+2] += alpha3*(*v);
1191       y[7*(*idx)+3] += alpha4*(*v);
1192       y[7*(*idx)+4] += alpha5*(*v);
1193       y[7*(*idx)+5] += alpha6*(*v);
1194       y[7*(*idx)+6] += alpha7*(*v);
1195       idx++; v++;
1196     }
1197   }
1198   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1199   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1200   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1205 {
1206   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1207   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1208   const PetscScalar *x,*v;
1209   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1210   PetscErrorCode    ierr;
1211   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1212   PetscInt          nonzerorow=0,n,i,jrow,j;
1213 
1214   PetscFunctionBegin;
1215   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1216   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1217   idx  = a->j;
1218   v    = a->a;
1219   ii   = a->i;
1220 
1221   for (i=0; i<m; i++) {
1222     jrow  = ii[i];
1223     n     = ii[i+1] - jrow;
1224     sum1  = 0.0;
1225     sum2  = 0.0;
1226     sum3  = 0.0;
1227     sum4  = 0.0;
1228     sum5  = 0.0;
1229     sum6  = 0.0;
1230     sum7  = 0.0;
1231     sum8  = 0.0;
1232 
1233     nonzerorow += (n>0);
1234     for (j=0; j<n; j++) {
1235       sum1 += v[jrow]*x[8*idx[jrow]];
1236       sum2 += v[jrow]*x[8*idx[jrow]+1];
1237       sum3 += v[jrow]*x[8*idx[jrow]+2];
1238       sum4 += v[jrow]*x[8*idx[jrow]+3];
1239       sum5 += v[jrow]*x[8*idx[jrow]+4];
1240       sum6 += v[jrow]*x[8*idx[jrow]+5];
1241       sum7 += v[jrow]*x[8*idx[jrow]+6];
1242       sum8 += v[jrow]*x[8*idx[jrow]+7];
1243       jrow++;
1244     }
1245     y[8*i]   = sum1;
1246     y[8*i+1] = sum2;
1247     y[8*i+2] = sum3;
1248     y[8*i+3] = sum4;
1249     y[8*i+4] = sum5;
1250     y[8*i+5] = sum6;
1251     y[8*i+6] = sum7;
1252     y[8*i+7] = sum8;
1253   }
1254 
1255   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
1256   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1257   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1262 {
1263   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1264   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1265   const PetscScalar *x,*v;
1266   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1267   PetscErrorCode    ierr;
1268   const PetscInt    m = b->AIJ->rmap->n,*idx;
1269   PetscInt          n,i;
1270 
1271   PetscFunctionBegin;
1272   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1273   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1274   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1275 
1276   for (i=0; i<m; i++) {
1277     idx    = a->j + a->i[i];
1278     v      = a->a + a->i[i];
1279     n      = a->i[i+1] - a->i[i];
1280     alpha1 = x[8*i];
1281     alpha2 = x[8*i+1];
1282     alpha3 = x[8*i+2];
1283     alpha4 = x[8*i+3];
1284     alpha5 = x[8*i+4];
1285     alpha6 = x[8*i+5];
1286     alpha7 = x[8*i+6];
1287     alpha8 = x[8*i+7];
1288     while (n-->0) {
1289       y[8*(*idx)]   += alpha1*(*v);
1290       y[8*(*idx)+1] += alpha2*(*v);
1291       y[8*(*idx)+2] += alpha3*(*v);
1292       y[8*(*idx)+3] += alpha4*(*v);
1293       y[8*(*idx)+4] += alpha5*(*v);
1294       y[8*(*idx)+5] += alpha6*(*v);
1295       y[8*(*idx)+6] += alpha7*(*v);
1296       y[8*(*idx)+7] += alpha8*(*v);
1297       idx++; v++;
1298     }
1299   }
1300   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1301   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1302   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1307 {
1308   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1309   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1310   const PetscScalar *x,*v;
1311   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1312   PetscErrorCode    ierr;
1313   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1314   PetscInt          n,i,jrow,j;
1315 
1316   PetscFunctionBegin;
1317   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1318   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1319   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1320   idx  = a->j;
1321   v    = a->a;
1322   ii   = a->i;
1323 
1324   for (i=0; i<m; i++) {
1325     jrow = ii[i];
1326     n    = ii[i+1] - jrow;
1327     sum1 = 0.0;
1328     sum2 = 0.0;
1329     sum3 = 0.0;
1330     sum4 = 0.0;
1331     sum5 = 0.0;
1332     sum6 = 0.0;
1333     sum7 = 0.0;
1334     sum8 = 0.0;
1335     for (j=0; j<n; j++) {
1336       sum1 += v[jrow]*x[8*idx[jrow]];
1337       sum2 += v[jrow]*x[8*idx[jrow]+1];
1338       sum3 += v[jrow]*x[8*idx[jrow]+2];
1339       sum4 += v[jrow]*x[8*idx[jrow]+3];
1340       sum5 += v[jrow]*x[8*idx[jrow]+4];
1341       sum6 += v[jrow]*x[8*idx[jrow]+5];
1342       sum7 += v[jrow]*x[8*idx[jrow]+6];
1343       sum8 += v[jrow]*x[8*idx[jrow]+7];
1344       jrow++;
1345     }
1346     y[8*i]   += sum1;
1347     y[8*i+1] += sum2;
1348     y[8*i+2] += sum3;
1349     y[8*i+3] += sum4;
1350     y[8*i+4] += sum5;
1351     y[8*i+5] += sum6;
1352     y[8*i+6] += sum7;
1353     y[8*i+7] += sum8;
1354   }
1355 
1356   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1357   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1358   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1363 {
1364   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1365   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1366   const PetscScalar *x,*v;
1367   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1368   PetscErrorCode    ierr;
1369   const PetscInt    m = b->AIJ->rmap->n,*idx;
1370   PetscInt          n,i;
1371 
1372   PetscFunctionBegin;
1373   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1374   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1375   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1376   for (i=0; i<m; i++) {
1377     idx    = a->j + a->i[i];
1378     v      = a->a + a->i[i];
1379     n      = a->i[i+1] - a->i[i];
1380     alpha1 = x[8*i];
1381     alpha2 = x[8*i+1];
1382     alpha3 = x[8*i+2];
1383     alpha4 = x[8*i+3];
1384     alpha5 = x[8*i+4];
1385     alpha6 = x[8*i+5];
1386     alpha7 = x[8*i+6];
1387     alpha8 = x[8*i+7];
1388     while (n-->0) {
1389       y[8*(*idx)]   += alpha1*(*v);
1390       y[8*(*idx)+1] += alpha2*(*v);
1391       y[8*(*idx)+2] += alpha3*(*v);
1392       y[8*(*idx)+3] += alpha4*(*v);
1393       y[8*(*idx)+4] += alpha5*(*v);
1394       y[8*(*idx)+5] += alpha6*(*v);
1395       y[8*(*idx)+6] += alpha7*(*v);
1396       y[8*(*idx)+7] += alpha8*(*v);
1397       idx++; v++;
1398     }
1399   }
1400   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1401   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1402   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 /* ------------------------------------------------------------------------------*/
1407 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1408 {
1409   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1410   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1411   const PetscScalar *x,*v;
1412   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1413   PetscErrorCode    ierr;
1414   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1415   PetscInt          nonzerorow=0,n,i,jrow,j;
1416 
1417   PetscFunctionBegin;
1418   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1419   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1420   idx  = a->j;
1421   v    = a->a;
1422   ii   = a->i;
1423 
1424   for (i=0; i<m; i++) {
1425     jrow  = ii[i];
1426     n     = ii[i+1] - jrow;
1427     sum1  = 0.0;
1428     sum2  = 0.0;
1429     sum3  = 0.0;
1430     sum4  = 0.0;
1431     sum5  = 0.0;
1432     sum6  = 0.0;
1433     sum7  = 0.0;
1434     sum8  = 0.0;
1435     sum9  = 0.0;
1436 
1437     nonzerorow += (n>0);
1438     for (j=0; j<n; j++) {
1439       sum1 += v[jrow]*x[9*idx[jrow]];
1440       sum2 += v[jrow]*x[9*idx[jrow]+1];
1441       sum3 += v[jrow]*x[9*idx[jrow]+2];
1442       sum4 += v[jrow]*x[9*idx[jrow]+3];
1443       sum5 += v[jrow]*x[9*idx[jrow]+4];
1444       sum6 += v[jrow]*x[9*idx[jrow]+5];
1445       sum7 += v[jrow]*x[9*idx[jrow]+6];
1446       sum8 += v[jrow]*x[9*idx[jrow]+7];
1447       sum9 += v[jrow]*x[9*idx[jrow]+8];
1448       jrow++;
1449     }
1450     y[9*i]   = sum1;
1451     y[9*i+1] = sum2;
1452     y[9*i+2] = sum3;
1453     y[9*i+3] = sum4;
1454     y[9*i+4] = sum5;
1455     y[9*i+5] = sum6;
1456     y[9*i+6] = sum7;
1457     y[9*i+7] = sum8;
1458     y[9*i+8] = sum9;
1459   }
1460 
1461   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
1462   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1463   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 /* ------------------------------------------------------------------------------*/
1468 
1469 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1470 {
1471   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1472   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1473   const PetscScalar *x,*v;
1474   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1475   PetscErrorCode    ierr;
1476   const PetscInt    m = b->AIJ->rmap->n,*idx;
1477   PetscInt          n,i;
1478 
1479   PetscFunctionBegin;
1480   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1481   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1482   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1483 
1484   for (i=0; i<m; i++) {
1485     idx    = a->j + a->i[i];
1486     v      = a->a + a->i[i];
1487     n      = a->i[i+1] - a->i[i];
1488     alpha1 = x[9*i];
1489     alpha2 = x[9*i+1];
1490     alpha3 = x[9*i+2];
1491     alpha4 = x[9*i+3];
1492     alpha5 = x[9*i+4];
1493     alpha6 = x[9*i+5];
1494     alpha7 = x[9*i+6];
1495     alpha8 = x[9*i+7];
1496     alpha9 = x[9*i+8];
1497     while (n-->0) {
1498       y[9*(*idx)]   += alpha1*(*v);
1499       y[9*(*idx)+1] += alpha2*(*v);
1500       y[9*(*idx)+2] += alpha3*(*v);
1501       y[9*(*idx)+3] += alpha4*(*v);
1502       y[9*(*idx)+4] += alpha5*(*v);
1503       y[9*(*idx)+5] += alpha6*(*v);
1504       y[9*(*idx)+6] += alpha7*(*v);
1505       y[9*(*idx)+7] += alpha8*(*v);
1506       y[9*(*idx)+8] += alpha9*(*v);
1507       idx++; v++;
1508     }
1509   }
1510   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1511   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1512   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1517 {
1518   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1519   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1520   const PetscScalar *x,*v;
1521   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1522   PetscErrorCode    ierr;
1523   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1524   PetscInt          n,i,jrow,j;
1525 
1526   PetscFunctionBegin;
1527   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1528   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1529   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1530   idx  = a->j;
1531   v    = a->a;
1532   ii   = a->i;
1533 
1534   for (i=0; i<m; i++) {
1535     jrow = ii[i];
1536     n    = ii[i+1] - jrow;
1537     sum1 = 0.0;
1538     sum2 = 0.0;
1539     sum3 = 0.0;
1540     sum4 = 0.0;
1541     sum5 = 0.0;
1542     sum6 = 0.0;
1543     sum7 = 0.0;
1544     sum8 = 0.0;
1545     sum9 = 0.0;
1546     for (j=0; j<n; j++) {
1547       sum1 += v[jrow]*x[9*idx[jrow]];
1548       sum2 += v[jrow]*x[9*idx[jrow]+1];
1549       sum3 += v[jrow]*x[9*idx[jrow]+2];
1550       sum4 += v[jrow]*x[9*idx[jrow]+3];
1551       sum5 += v[jrow]*x[9*idx[jrow]+4];
1552       sum6 += v[jrow]*x[9*idx[jrow]+5];
1553       sum7 += v[jrow]*x[9*idx[jrow]+6];
1554       sum8 += v[jrow]*x[9*idx[jrow]+7];
1555       sum9 += v[jrow]*x[9*idx[jrow]+8];
1556       jrow++;
1557     }
1558     y[9*i]   += sum1;
1559     y[9*i+1] += sum2;
1560     y[9*i+2] += sum3;
1561     y[9*i+3] += sum4;
1562     y[9*i+4] += sum5;
1563     y[9*i+5] += sum6;
1564     y[9*i+6] += sum7;
1565     y[9*i+7] += sum8;
1566     y[9*i+8] += sum9;
1567   }
1568 
1569   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1570   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1571   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1576 {
1577   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1578   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1579   const PetscScalar *x,*v;
1580   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1581   PetscErrorCode    ierr;
1582   const PetscInt    m = b->AIJ->rmap->n,*idx;
1583   PetscInt          n,i;
1584 
1585   PetscFunctionBegin;
1586   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1587   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1588   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1589   for (i=0; i<m; i++) {
1590     idx    = a->j + a->i[i];
1591     v      = a->a + a->i[i];
1592     n      = a->i[i+1] - a->i[i];
1593     alpha1 = x[9*i];
1594     alpha2 = x[9*i+1];
1595     alpha3 = x[9*i+2];
1596     alpha4 = x[9*i+3];
1597     alpha5 = x[9*i+4];
1598     alpha6 = x[9*i+5];
1599     alpha7 = x[9*i+6];
1600     alpha8 = x[9*i+7];
1601     alpha9 = x[9*i+8];
1602     while (n-->0) {
1603       y[9*(*idx)]   += alpha1*(*v);
1604       y[9*(*idx)+1] += alpha2*(*v);
1605       y[9*(*idx)+2] += alpha3*(*v);
1606       y[9*(*idx)+3] += alpha4*(*v);
1607       y[9*(*idx)+4] += alpha5*(*v);
1608       y[9*(*idx)+5] += alpha6*(*v);
1609       y[9*(*idx)+6] += alpha7*(*v);
1610       y[9*(*idx)+7] += alpha8*(*v);
1611       y[9*(*idx)+8] += alpha9*(*v);
1612       idx++; v++;
1613     }
1614   }
1615   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1616   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1617   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1618   PetscFunctionReturn(0);
1619 }
1620 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1621 {
1622   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1623   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1624   const PetscScalar *x,*v;
1625   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1626   PetscErrorCode    ierr;
1627   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1628   PetscInt          nonzerorow=0,n,i,jrow,j;
1629 
1630   PetscFunctionBegin;
1631   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1632   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1633   idx  = a->j;
1634   v    = a->a;
1635   ii   = a->i;
1636 
1637   for (i=0; i<m; i++) {
1638     jrow  = ii[i];
1639     n     = ii[i+1] - jrow;
1640     sum1  = 0.0;
1641     sum2  = 0.0;
1642     sum3  = 0.0;
1643     sum4  = 0.0;
1644     sum5  = 0.0;
1645     sum6  = 0.0;
1646     sum7  = 0.0;
1647     sum8  = 0.0;
1648     sum9  = 0.0;
1649     sum10 = 0.0;
1650 
1651     nonzerorow += (n>0);
1652     for (j=0; j<n; j++) {
1653       sum1  += v[jrow]*x[10*idx[jrow]];
1654       sum2  += v[jrow]*x[10*idx[jrow]+1];
1655       sum3  += v[jrow]*x[10*idx[jrow]+2];
1656       sum4  += v[jrow]*x[10*idx[jrow]+3];
1657       sum5  += v[jrow]*x[10*idx[jrow]+4];
1658       sum6  += v[jrow]*x[10*idx[jrow]+5];
1659       sum7  += v[jrow]*x[10*idx[jrow]+6];
1660       sum8  += v[jrow]*x[10*idx[jrow]+7];
1661       sum9  += v[jrow]*x[10*idx[jrow]+8];
1662       sum10 += v[jrow]*x[10*idx[jrow]+9];
1663       jrow++;
1664     }
1665     y[10*i]   = sum1;
1666     y[10*i+1] = sum2;
1667     y[10*i+2] = sum3;
1668     y[10*i+3] = sum4;
1669     y[10*i+4] = sum5;
1670     y[10*i+5] = sum6;
1671     y[10*i+6] = sum7;
1672     y[10*i+7] = sum8;
1673     y[10*i+8] = sum9;
1674     y[10*i+9] = sum10;
1675   }
1676 
1677   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
1678   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1679   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1684 {
1685   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1686   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1687   const PetscScalar *x,*v;
1688   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1689   PetscErrorCode    ierr;
1690   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1691   PetscInt          n,i,jrow,j;
1692 
1693   PetscFunctionBegin;
1694   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1695   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1696   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1697   idx  = a->j;
1698   v    = a->a;
1699   ii   = a->i;
1700 
1701   for (i=0; i<m; i++) {
1702     jrow  = ii[i];
1703     n     = ii[i+1] - jrow;
1704     sum1  = 0.0;
1705     sum2  = 0.0;
1706     sum3  = 0.0;
1707     sum4  = 0.0;
1708     sum5  = 0.0;
1709     sum6  = 0.0;
1710     sum7  = 0.0;
1711     sum8  = 0.0;
1712     sum9  = 0.0;
1713     sum10 = 0.0;
1714     for (j=0; j<n; j++) {
1715       sum1  += v[jrow]*x[10*idx[jrow]];
1716       sum2  += v[jrow]*x[10*idx[jrow]+1];
1717       sum3  += v[jrow]*x[10*idx[jrow]+2];
1718       sum4  += v[jrow]*x[10*idx[jrow]+3];
1719       sum5  += v[jrow]*x[10*idx[jrow]+4];
1720       sum6  += v[jrow]*x[10*idx[jrow]+5];
1721       sum7  += v[jrow]*x[10*idx[jrow]+6];
1722       sum8  += v[jrow]*x[10*idx[jrow]+7];
1723       sum9  += v[jrow]*x[10*idx[jrow]+8];
1724       sum10 += v[jrow]*x[10*idx[jrow]+9];
1725       jrow++;
1726     }
1727     y[10*i]   += sum1;
1728     y[10*i+1] += sum2;
1729     y[10*i+2] += sum3;
1730     y[10*i+3] += sum4;
1731     y[10*i+4] += sum5;
1732     y[10*i+5] += sum6;
1733     y[10*i+6] += sum7;
1734     y[10*i+7] += sum8;
1735     y[10*i+8] += sum9;
1736     y[10*i+9] += sum10;
1737   }
1738 
1739   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1740   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1741   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1746 {
1747   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1748   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1749   const PetscScalar *x,*v;
1750   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1751   PetscErrorCode    ierr;
1752   const PetscInt    m = b->AIJ->rmap->n,*idx;
1753   PetscInt          n,i;
1754 
1755   PetscFunctionBegin;
1756   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1757   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1758   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1759 
1760   for (i=0; i<m; i++) {
1761     idx     = a->j + a->i[i];
1762     v       = a->a + a->i[i];
1763     n       = a->i[i+1] - a->i[i];
1764     alpha1  = x[10*i];
1765     alpha2  = x[10*i+1];
1766     alpha3  = x[10*i+2];
1767     alpha4  = x[10*i+3];
1768     alpha5  = x[10*i+4];
1769     alpha6  = x[10*i+5];
1770     alpha7  = x[10*i+6];
1771     alpha8  = x[10*i+7];
1772     alpha9  = x[10*i+8];
1773     alpha10 = x[10*i+9];
1774     while (n-->0) {
1775       y[10*(*idx)]   += alpha1*(*v);
1776       y[10*(*idx)+1] += alpha2*(*v);
1777       y[10*(*idx)+2] += alpha3*(*v);
1778       y[10*(*idx)+3] += alpha4*(*v);
1779       y[10*(*idx)+4] += alpha5*(*v);
1780       y[10*(*idx)+5] += alpha6*(*v);
1781       y[10*(*idx)+6] += alpha7*(*v);
1782       y[10*(*idx)+7] += alpha8*(*v);
1783       y[10*(*idx)+8] += alpha9*(*v);
1784       y[10*(*idx)+9] += alpha10*(*v);
1785       idx++; v++;
1786     }
1787   }
1788   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1789   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1790   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1795 {
1796   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1797   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1798   const PetscScalar *x,*v;
1799   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1800   PetscErrorCode    ierr;
1801   const PetscInt    m = b->AIJ->rmap->n,*idx;
1802   PetscInt          n,i;
1803 
1804   PetscFunctionBegin;
1805   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1806   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1807   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1808   for (i=0; i<m; i++) {
1809     idx     = a->j + a->i[i];
1810     v       = a->a + a->i[i];
1811     n       = a->i[i+1] - a->i[i];
1812     alpha1  = x[10*i];
1813     alpha2  = x[10*i+1];
1814     alpha3  = x[10*i+2];
1815     alpha4  = x[10*i+3];
1816     alpha5  = x[10*i+4];
1817     alpha6  = x[10*i+5];
1818     alpha7  = x[10*i+6];
1819     alpha8  = x[10*i+7];
1820     alpha9  = x[10*i+8];
1821     alpha10 = x[10*i+9];
1822     while (n-->0) {
1823       y[10*(*idx)]   += alpha1*(*v);
1824       y[10*(*idx)+1] += alpha2*(*v);
1825       y[10*(*idx)+2] += alpha3*(*v);
1826       y[10*(*idx)+3] += alpha4*(*v);
1827       y[10*(*idx)+4] += alpha5*(*v);
1828       y[10*(*idx)+5] += alpha6*(*v);
1829       y[10*(*idx)+6] += alpha7*(*v);
1830       y[10*(*idx)+7] += alpha8*(*v);
1831       y[10*(*idx)+8] += alpha9*(*v);
1832       y[10*(*idx)+9] += alpha10*(*v);
1833       idx++; v++;
1834     }
1835   }
1836   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1837   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1838   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 
1843 /*--------------------------------------------------------------------------------------------*/
1844 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1845 {
1846   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1847   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1848   const PetscScalar *x,*v;
1849   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1850   PetscErrorCode    ierr;
1851   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1852   PetscInt          nonzerorow=0,n,i,jrow,j;
1853 
1854   PetscFunctionBegin;
1855   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1856   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1857   idx  = a->j;
1858   v    = a->a;
1859   ii   = a->i;
1860 
1861   for (i=0; i<m; i++) {
1862     jrow  = ii[i];
1863     n     = ii[i+1] - jrow;
1864     sum1  = 0.0;
1865     sum2  = 0.0;
1866     sum3  = 0.0;
1867     sum4  = 0.0;
1868     sum5  = 0.0;
1869     sum6  = 0.0;
1870     sum7  = 0.0;
1871     sum8  = 0.0;
1872     sum9  = 0.0;
1873     sum10 = 0.0;
1874     sum11 = 0.0;
1875 
1876     nonzerorow += (n>0);
1877     for (j=0; j<n; j++) {
1878       sum1  += v[jrow]*x[11*idx[jrow]];
1879       sum2  += v[jrow]*x[11*idx[jrow]+1];
1880       sum3  += v[jrow]*x[11*idx[jrow]+2];
1881       sum4  += v[jrow]*x[11*idx[jrow]+3];
1882       sum5  += v[jrow]*x[11*idx[jrow]+4];
1883       sum6  += v[jrow]*x[11*idx[jrow]+5];
1884       sum7  += v[jrow]*x[11*idx[jrow]+6];
1885       sum8  += v[jrow]*x[11*idx[jrow]+7];
1886       sum9  += v[jrow]*x[11*idx[jrow]+8];
1887       sum10 += v[jrow]*x[11*idx[jrow]+9];
1888       sum11 += v[jrow]*x[11*idx[jrow]+10];
1889       jrow++;
1890     }
1891     y[11*i]    = sum1;
1892     y[11*i+1]  = sum2;
1893     y[11*i+2]  = sum3;
1894     y[11*i+3]  = sum4;
1895     y[11*i+4]  = sum5;
1896     y[11*i+5]  = sum6;
1897     y[11*i+6]  = sum7;
1898     y[11*i+7]  = sum8;
1899     y[11*i+8]  = sum9;
1900     y[11*i+9]  = sum10;
1901     y[11*i+10] = sum11;
1902   }
1903 
1904   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
1905   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1906   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1911 {
1912   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1913   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1914   const PetscScalar *x,*v;
1915   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1916   PetscErrorCode    ierr;
1917   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1918   PetscInt          n,i,jrow,j;
1919 
1920   PetscFunctionBegin;
1921   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1922   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1923   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1924   idx  = a->j;
1925   v    = a->a;
1926   ii   = a->i;
1927 
1928   for (i=0; i<m; i++) {
1929     jrow  = ii[i];
1930     n     = ii[i+1] - jrow;
1931     sum1  = 0.0;
1932     sum2  = 0.0;
1933     sum3  = 0.0;
1934     sum4  = 0.0;
1935     sum5  = 0.0;
1936     sum6  = 0.0;
1937     sum7  = 0.0;
1938     sum8  = 0.0;
1939     sum9  = 0.0;
1940     sum10 = 0.0;
1941     sum11 = 0.0;
1942     for (j=0; j<n; j++) {
1943       sum1  += v[jrow]*x[11*idx[jrow]];
1944       sum2  += v[jrow]*x[11*idx[jrow]+1];
1945       sum3  += v[jrow]*x[11*idx[jrow]+2];
1946       sum4  += v[jrow]*x[11*idx[jrow]+3];
1947       sum5  += v[jrow]*x[11*idx[jrow]+4];
1948       sum6  += v[jrow]*x[11*idx[jrow]+5];
1949       sum7  += v[jrow]*x[11*idx[jrow]+6];
1950       sum8  += v[jrow]*x[11*idx[jrow]+7];
1951       sum9  += v[jrow]*x[11*idx[jrow]+8];
1952       sum10 += v[jrow]*x[11*idx[jrow]+9];
1953       sum11 += v[jrow]*x[11*idx[jrow]+10];
1954       jrow++;
1955     }
1956     y[11*i]    += sum1;
1957     y[11*i+1]  += sum2;
1958     y[11*i+2]  += sum3;
1959     y[11*i+3]  += sum4;
1960     y[11*i+4]  += sum5;
1961     y[11*i+5]  += sum6;
1962     y[11*i+6]  += sum7;
1963     y[11*i+7]  += sum8;
1964     y[11*i+8]  += sum9;
1965     y[11*i+9]  += sum10;
1966     y[11*i+10] += sum11;
1967   }
1968 
1969   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
1970   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1971   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1972   PetscFunctionReturn(0);
1973 }
1974 
1975 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1976 {
1977   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1978   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1979   const PetscScalar *x,*v;
1980   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1981   PetscErrorCode    ierr;
1982   const PetscInt    m = b->AIJ->rmap->n,*idx;
1983   PetscInt          n,i;
1984 
1985   PetscFunctionBegin;
1986   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1987   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1988   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1989 
1990   for (i=0; i<m; i++) {
1991     idx     = a->j + a->i[i];
1992     v       = a->a + a->i[i];
1993     n       = a->i[i+1] - a->i[i];
1994     alpha1  = x[11*i];
1995     alpha2  = x[11*i+1];
1996     alpha3  = x[11*i+2];
1997     alpha4  = x[11*i+3];
1998     alpha5  = x[11*i+4];
1999     alpha6  = x[11*i+5];
2000     alpha7  = x[11*i+6];
2001     alpha8  = x[11*i+7];
2002     alpha9  = x[11*i+8];
2003     alpha10 = x[11*i+9];
2004     alpha11 = x[11*i+10];
2005     while (n-->0) {
2006       y[11*(*idx)]    += alpha1*(*v);
2007       y[11*(*idx)+1]  += alpha2*(*v);
2008       y[11*(*idx)+2]  += alpha3*(*v);
2009       y[11*(*idx)+3]  += alpha4*(*v);
2010       y[11*(*idx)+4]  += alpha5*(*v);
2011       y[11*(*idx)+5]  += alpha6*(*v);
2012       y[11*(*idx)+6]  += alpha7*(*v);
2013       y[11*(*idx)+7]  += alpha8*(*v);
2014       y[11*(*idx)+8]  += alpha9*(*v);
2015       y[11*(*idx)+9]  += alpha10*(*v);
2016       y[11*(*idx)+10] += alpha11*(*v);
2017       idx++; v++;
2018     }
2019   }
2020   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2021   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2022   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2027 {
2028   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2029   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2030   const PetscScalar *x,*v;
2031   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2032   PetscErrorCode    ierr;
2033   const PetscInt    m = b->AIJ->rmap->n,*idx;
2034   PetscInt          n,i;
2035 
2036   PetscFunctionBegin;
2037   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2038   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2039   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2040   for (i=0; i<m; i++) {
2041     idx     = a->j + a->i[i];
2042     v       = a->a + a->i[i];
2043     n       = a->i[i+1] - a->i[i];
2044     alpha1  = x[11*i];
2045     alpha2  = x[11*i+1];
2046     alpha3  = x[11*i+2];
2047     alpha4  = x[11*i+3];
2048     alpha5  = x[11*i+4];
2049     alpha6  = x[11*i+5];
2050     alpha7  = x[11*i+6];
2051     alpha8  = x[11*i+7];
2052     alpha9  = x[11*i+8];
2053     alpha10 = x[11*i+9];
2054     alpha11 = x[11*i+10];
2055     while (n-->0) {
2056       y[11*(*idx)]    += alpha1*(*v);
2057       y[11*(*idx)+1]  += alpha2*(*v);
2058       y[11*(*idx)+2]  += alpha3*(*v);
2059       y[11*(*idx)+3]  += alpha4*(*v);
2060       y[11*(*idx)+4]  += alpha5*(*v);
2061       y[11*(*idx)+5]  += alpha6*(*v);
2062       y[11*(*idx)+6]  += alpha7*(*v);
2063       y[11*(*idx)+7]  += alpha8*(*v);
2064       y[11*(*idx)+8]  += alpha9*(*v);
2065       y[11*(*idx)+9]  += alpha10*(*v);
2066       y[11*(*idx)+10] += alpha11*(*v);
2067       idx++; v++;
2068     }
2069   }
2070   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2071   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2072   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 
2077 /*--------------------------------------------------------------------------------------------*/
2078 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2079 {
2080   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2081   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2082   const PetscScalar *x,*v;
2083   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2084   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2085   PetscErrorCode    ierr;
2086   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2087   PetscInt          nonzerorow=0,n,i,jrow,j;
2088 
2089   PetscFunctionBegin;
2090   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2091   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2092   idx  = a->j;
2093   v    = a->a;
2094   ii   = a->i;
2095 
2096   for (i=0; i<m; i++) {
2097     jrow  = ii[i];
2098     n     = ii[i+1] - jrow;
2099     sum1  = 0.0;
2100     sum2  = 0.0;
2101     sum3  = 0.0;
2102     sum4  = 0.0;
2103     sum5  = 0.0;
2104     sum6  = 0.0;
2105     sum7  = 0.0;
2106     sum8  = 0.0;
2107     sum9  = 0.0;
2108     sum10 = 0.0;
2109     sum11 = 0.0;
2110     sum12 = 0.0;
2111     sum13 = 0.0;
2112     sum14 = 0.0;
2113     sum15 = 0.0;
2114     sum16 = 0.0;
2115 
2116     nonzerorow += (n>0);
2117     for (j=0; j<n; j++) {
2118       sum1  += v[jrow]*x[16*idx[jrow]];
2119       sum2  += v[jrow]*x[16*idx[jrow]+1];
2120       sum3  += v[jrow]*x[16*idx[jrow]+2];
2121       sum4  += v[jrow]*x[16*idx[jrow]+3];
2122       sum5  += v[jrow]*x[16*idx[jrow]+4];
2123       sum6  += v[jrow]*x[16*idx[jrow]+5];
2124       sum7  += v[jrow]*x[16*idx[jrow]+6];
2125       sum8  += v[jrow]*x[16*idx[jrow]+7];
2126       sum9  += v[jrow]*x[16*idx[jrow]+8];
2127       sum10 += v[jrow]*x[16*idx[jrow]+9];
2128       sum11 += v[jrow]*x[16*idx[jrow]+10];
2129       sum12 += v[jrow]*x[16*idx[jrow]+11];
2130       sum13 += v[jrow]*x[16*idx[jrow]+12];
2131       sum14 += v[jrow]*x[16*idx[jrow]+13];
2132       sum15 += v[jrow]*x[16*idx[jrow]+14];
2133       sum16 += v[jrow]*x[16*idx[jrow]+15];
2134       jrow++;
2135     }
2136     y[16*i]    = sum1;
2137     y[16*i+1]  = sum2;
2138     y[16*i+2]  = sum3;
2139     y[16*i+3]  = sum4;
2140     y[16*i+4]  = sum5;
2141     y[16*i+5]  = sum6;
2142     y[16*i+6]  = sum7;
2143     y[16*i+7]  = sum8;
2144     y[16*i+8]  = sum9;
2145     y[16*i+9]  = sum10;
2146     y[16*i+10] = sum11;
2147     y[16*i+11] = sum12;
2148     y[16*i+12] = sum13;
2149     y[16*i+13] = sum14;
2150     y[16*i+14] = sum15;
2151     y[16*i+15] = sum16;
2152   }
2153 
2154   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
2155   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2156   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2161 {
2162   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2163   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2164   const PetscScalar *x,*v;
2165   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2166   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2167   PetscErrorCode    ierr;
2168   const PetscInt    m = b->AIJ->rmap->n,*idx;
2169   PetscInt          n,i;
2170 
2171   PetscFunctionBegin;
2172   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2173   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2174   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2175 
2176   for (i=0; i<m; i++) {
2177     idx     = a->j + a->i[i];
2178     v       = a->a + a->i[i];
2179     n       = a->i[i+1] - a->i[i];
2180     alpha1  = x[16*i];
2181     alpha2  = x[16*i+1];
2182     alpha3  = x[16*i+2];
2183     alpha4  = x[16*i+3];
2184     alpha5  = x[16*i+4];
2185     alpha6  = x[16*i+5];
2186     alpha7  = x[16*i+6];
2187     alpha8  = x[16*i+7];
2188     alpha9  = x[16*i+8];
2189     alpha10 = x[16*i+9];
2190     alpha11 = x[16*i+10];
2191     alpha12 = x[16*i+11];
2192     alpha13 = x[16*i+12];
2193     alpha14 = x[16*i+13];
2194     alpha15 = x[16*i+14];
2195     alpha16 = x[16*i+15];
2196     while (n-->0) {
2197       y[16*(*idx)]    += alpha1*(*v);
2198       y[16*(*idx)+1]  += alpha2*(*v);
2199       y[16*(*idx)+2]  += alpha3*(*v);
2200       y[16*(*idx)+3]  += alpha4*(*v);
2201       y[16*(*idx)+4]  += alpha5*(*v);
2202       y[16*(*idx)+5]  += alpha6*(*v);
2203       y[16*(*idx)+6]  += alpha7*(*v);
2204       y[16*(*idx)+7]  += alpha8*(*v);
2205       y[16*(*idx)+8]  += alpha9*(*v);
2206       y[16*(*idx)+9]  += alpha10*(*v);
2207       y[16*(*idx)+10] += alpha11*(*v);
2208       y[16*(*idx)+11] += alpha12*(*v);
2209       y[16*(*idx)+12] += alpha13*(*v);
2210       y[16*(*idx)+13] += alpha14*(*v);
2211       y[16*(*idx)+14] += alpha15*(*v);
2212       y[16*(*idx)+15] += alpha16*(*v);
2213       idx++; v++;
2214     }
2215   }
2216   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2217   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2218   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2223 {
2224   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2225   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2226   const PetscScalar *x,*v;
2227   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2228   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2229   PetscErrorCode    ierr;
2230   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2231   PetscInt          n,i,jrow,j;
2232 
2233   PetscFunctionBegin;
2234   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2235   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2236   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2237   idx  = a->j;
2238   v    = a->a;
2239   ii   = a->i;
2240 
2241   for (i=0; i<m; i++) {
2242     jrow  = ii[i];
2243     n     = ii[i+1] - jrow;
2244     sum1  = 0.0;
2245     sum2  = 0.0;
2246     sum3  = 0.0;
2247     sum4  = 0.0;
2248     sum5  = 0.0;
2249     sum6  = 0.0;
2250     sum7  = 0.0;
2251     sum8  = 0.0;
2252     sum9  = 0.0;
2253     sum10 = 0.0;
2254     sum11 = 0.0;
2255     sum12 = 0.0;
2256     sum13 = 0.0;
2257     sum14 = 0.0;
2258     sum15 = 0.0;
2259     sum16 = 0.0;
2260     for (j=0; j<n; j++) {
2261       sum1  += v[jrow]*x[16*idx[jrow]];
2262       sum2  += v[jrow]*x[16*idx[jrow]+1];
2263       sum3  += v[jrow]*x[16*idx[jrow]+2];
2264       sum4  += v[jrow]*x[16*idx[jrow]+3];
2265       sum5  += v[jrow]*x[16*idx[jrow]+4];
2266       sum6  += v[jrow]*x[16*idx[jrow]+5];
2267       sum7  += v[jrow]*x[16*idx[jrow]+6];
2268       sum8  += v[jrow]*x[16*idx[jrow]+7];
2269       sum9  += v[jrow]*x[16*idx[jrow]+8];
2270       sum10 += v[jrow]*x[16*idx[jrow]+9];
2271       sum11 += v[jrow]*x[16*idx[jrow]+10];
2272       sum12 += v[jrow]*x[16*idx[jrow]+11];
2273       sum13 += v[jrow]*x[16*idx[jrow]+12];
2274       sum14 += v[jrow]*x[16*idx[jrow]+13];
2275       sum15 += v[jrow]*x[16*idx[jrow]+14];
2276       sum16 += v[jrow]*x[16*idx[jrow]+15];
2277       jrow++;
2278     }
2279     y[16*i]    += sum1;
2280     y[16*i+1]  += sum2;
2281     y[16*i+2]  += sum3;
2282     y[16*i+3]  += sum4;
2283     y[16*i+4]  += sum5;
2284     y[16*i+5]  += sum6;
2285     y[16*i+6]  += sum7;
2286     y[16*i+7]  += sum8;
2287     y[16*i+8]  += sum9;
2288     y[16*i+9]  += sum10;
2289     y[16*i+10] += sum11;
2290     y[16*i+11] += sum12;
2291     y[16*i+12] += sum13;
2292     y[16*i+13] += sum14;
2293     y[16*i+14] += sum15;
2294     y[16*i+15] += sum16;
2295   }
2296 
2297   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2298   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2299   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2304 {
2305   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2306   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2307   const PetscScalar *x,*v;
2308   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2309   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2310   PetscErrorCode    ierr;
2311   const PetscInt    m = b->AIJ->rmap->n,*idx;
2312   PetscInt          n,i;
2313 
2314   PetscFunctionBegin;
2315   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2316   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2317   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2318   for (i=0; i<m; i++) {
2319     idx     = a->j + a->i[i];
2320     v       = a->a + a->i[i];
2321     n       = a->i[i+1] - a->i[i];
2322     alpha1  = x[16*i];
2323     alpha2  = x[16*i+1];
2324     alpha3  = x[16*i+2];
2325     alpha4  = x[16*i+3];
2326     alpha5  = x[16*i+4];
2327     alpha6  = x[16*i+5];
2328     alpha7  = x[16*i+6];
2329     alpha8  = x[16*i+7];
2330     alpha9  = x[16*i+8];
2331     alpha10 = x[16*i+9];
2332     alpha11 = x[16*i+10];
2333     alpha12 = x[16*i+11];
2334     alpha13 = x[16*i+12];
2335     alpha14 = x[16*i+13];
2336     alpha15 = x[16*i+14];
2337     alpha16 = x[16*i+15];
2338     while (n-->0) {
2339       y[16*(*idx)]    += alpha1*(*v);
2340       y[16*(*idx)+1]  += alpha2*(*v);
2341       y[16*(*idx)+2]  += alpha3*(*v);
2342       y[16*(*idx)+3]  += alpha4*(*v);
2343       y[16*(*idx)+4]  += alpha5*(*v);
2344       y[16*(*idx)+5]  += alpha6*(*v);
2345       y[16*(*idx)+6]  += alpha7*(*v);
2346       y[16*(*idx)+7]  += alpha8*(*v);
2347       y[16*(*idx)+8]  += alpha9*(*v);
2348       y[16*(*idx)+9]  += alpha10*(*v);
2349       y[16*(*idx)+10] += alpha11*(*v);
2350       y[16*(*idx)+11] += alpha12*(*v);
2351       y[16*(*idx)+12] += alpha13*(*v);
2352       y[16*(*idx)+13] += alpha14*(*v);
2353       y[16*(*idx)+14] += alpha15*(*v);
2354       y[16*(*idx)+15] += alpha16*(*v);
2355       idx++; v++;
2356     }
2357   }
2358   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2359   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2360   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 /*--------------------------------------------------------------------------------------------*/
2365 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2366 {
2367   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2368   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2369   const PetscScalar *x,*v;
2370   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2371   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2372   PetscErrorCode    ierr;
2373   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2374   PetscInt          nonzerorow=0,n,i,jrow,j;
2375 
2376   PetscFunctionBegin;
2377   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2378   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2379   idx  = a->j;
2380   v    = a->a;
2381   ii   = a->i;
2382 
2383   for (i=0; i<m; i++) {
2384     jrow  = ii[i];
2385     n     = ii[i+1] - jrow;
2386     sum1  = 0.0;
2387     sum2  = 0.0;
2388     sum3  = 0.0;
2389     sum4  = 0.0;
2390     sum5  = 0.0;
2391     sum6  = 0.0;
2392     sum7  = 0.0;
2393     sum8  = 0.0;
2394     sum9  = 0.0;
2395     sum10 = 0.0;
2396     sum11 = 0.0;
2397     sum12 = 0.0;
2398     sum13 = 0.0;
2399     sum14 = 0.0;
2400     sum15 = 0.0;
2401     sum16 = 0.0;
2402     sum17 = 0.0;
2403     sum18 = 0.0;
2404 
2405     nonzerorow += (n>0);
2406     for (j=0; j<n; j++) {
2407       sum1  += v[jrow]*x[18*idx[jrow]];
2408       sum2  += v[jrow]*x[18*idx[jrow]+1];
2409       sum3  += v[jrow]*x[18*idx[jrow]+2];
2410       sum4  += v[jrow]*x[18*idx[jrow]+3];
2411       sum5  += v[jrow]*x[18*idx[jrow]+4];
2412       sum6  += v[jrow]*x[18*idx[jrow]+5];
2413       sum7  += v[jrow]*x[18*idx[jrow]+6];
2414       sum8  += v[jrow]*x[18*idx[jrow]+7];
2415       sum9  += v[jrow]*x[18*idx[jrow]+8];
2416       sum10 += v[jrow]*x[18*idx[jrow]+9];
2417       sum11 += v[jrow]*x[18*idx[jrow]+10];
2418       sum12 += v[jrow]*x[18*idx[jrow]+11];
2419       sum13 += v[jrow]*x[18*idx[jrow]+12];
2420       sum14 += v[jrow]*x[18*idx[jrow]+13];
2421       sum15 += v[jrow]*x[18*idx[jrow]+14];
2422       sum16 += v[jrow]*x[18*idx[jrow]+15];
2423       sum17 += v[jrow]*x[18*idx[jrow]+16];
2424       sum18 += v[jrow]*x[18*idx[jrow]+17];
2425       jrow++;
2426     }
2427     y[18*i]    = sum1;
2428     y[18*i+1]  = sum2;
2429     y[18*i+2]  = sum3;
2430     y[18*i+3]  = sum4;
2431     y[18*i+4]  = sum5;
2432     y[18*i+5]  = sum6;
2433     y[18*i+6]  = sum7;
2434     y[18*i+7]  = sum8;
2435     y[18*i+8]  = sum9;
2436     y[18*i+9]  = sum10;
2437     y[18*i+10] = sum11;
2438     y[18*i+11] = sum12;
2439     y[18*i+12] = sum13;
2440     y[18*i+13] = sum14;
2441     y[18*i+14] = sum15;
2442     y[18*i+15] = sum16;
2443     y[18*i+16] = sum17;
2444     y[18*i+17] = sum18;
2445   }
2446 
2447   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
2448   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2449   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2454 {
2455   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2456   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2457   const PetscScalar *x,*v;
2458   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2459   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2460   PetscErrorCode    ierr;
2461   const PetscInt    m = b->AIJ->rmap->n,*idx;
2462   PetscInt          n,i;
2463 
2464   PetscFunctionBegin;
2465   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2466   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2467   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2468 
2469   for (i=0; i<m; i++) {
2470     idx     = a->j + a->i[i];
2471     v       = a->a + a->i[i];
2472     n       = a->i[i+1] - a->i[i];
2473     alpha1  = x[18*i];
2474     alpha2  = x[18*i+1];
2475     alpha3  = x[18*i+2];
2476     alpha4  = x[18*i+3];
2477     alpha5  = x[18*i+4];
2478     alpha6  = x[18*i+5];
2479     alpha7  = x[18*i+6];
2480     alpha8  = x[18*i+7];
2481     alpha9  = x[18*i+8];
2482     alpha10 = x[18*i+9];
2483     alpha11 = x[18*i+10];
2484     alpha12 = x[18*i+11];
2485     alpha13 = x[18*i+12];
2486     alpha14 = x[18*i+13];
2487     alpha15 = x[18*i+14];
2488     alpha16 = x[18*i+15];
2489     alpha17 = x[18*i+16];
2490     alpha18 = x[18*i+17];
2491     while (n-->0) {
2492       y[18*(*idx)]    += alpha1*(*v);
2493       y[18*(*idx)+1]  += alpha2*(*v);
2494       y[18*(*idx)+2]  += alpha3*(*v);
2495       y[18*(*idx)+3]  += alpha4*(*v);
2496       y[18*(*idx)+4]  += alpha5*(*v);
2497       y[18*(*idx)+5]  += alpha6*(*v);
2498       y[18*(*idx)+6]  += alpha7*(*v);
2499       y[18*(*idx)+7]  += alpha8*(*v);
2500       y[18*(*idx)+8]  += alpha9*(*v);
2501       y[18*(*idx)+9]  += alpha10*(*v);
2502       y[18*(*idx)+10] += alpha11*(*v);
2503       y[18*(*idx)+11] += alpha12*(*v);
2504       y[18*(*idx)+12] += alpha13*(*v);
2505       y[18*(*idx)+13] += alpha14*(*v);
2506       y[18*(*idx)+14] += alpha15*(*v);
2507       y[18*(*idx)+15] += alpha16*(*v);
2508       y[18*(*idx)+16] += alpha17*(*v);
2509       y[18*(*idx)+17] += alpha18*(*v);
2510       idx++; v++;
2511     }
2512   }
2513   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2514   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2515   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2520 {
2521   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2522   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2523   const PetscScalar *x,*v;
2524   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2525   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2526   PetscErrorCode    ierr;
2527   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2528   PetscInt          n,i,jrow,j;
2529 
2530   PetscFunctionBegin;
2531   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2532   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2533   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2534   idx  = a->j;
2535   v    = a->a;
2536   ii   = a->i;
2537 
2538   for (i=0; i<m; i++) {
2539     jrow  = ii[i];
2540     n     = ii[i+1] - jrow;
2541     sum1  = 0.0;
2542     sum2  = 0.0;
2543     sum3  = 0.0;
2544     sum4  = 0.0;
2545     sum5  = 0.0;
2546     sum6  = 0.0;
2547     sum7  = 0.0;
2548     sum8  = 0.0;
2549     sum9  = 0.0;
2550     sum10 = 0.0;
2551     sum11 = 0.0;
2552     sum12 = 0.0;
2553     sum13 = 0.0;
2554     sum14 = 0.0;
2555     sum15 = 0.0;
2556     sum16 = 0.0;
2557     sum17 = 0.0;
2558     sum18 = 0.0;
2559     for (j=0; j<n; j++) {
2560       sum1  += v[jrow]*x[18*idx[jrow]];
2561       sum2  += v[jrow]*x[18*idx[jrow]+1];
2562       sum3  += v[jrow]*x[18*idx[jrow]+2];
2563       sum4  += v[jrow]*x[18*idx[jrow]+3];
2564       sum5  += v[jrow]*x[18*idx[jrow]+4];
2565       sum6  += v[jrow]*x[18*idx[jrow]+5];
2566       sum7  += v[jrow]*x[18*idx[jrow]+6];
2567       sum8  += v[jrow]*x[18*idx[jrow]+7];
2568       sum9  += v[jrow]*x[18*idx[jrow]+8];
2569       sum10 += v[jrow]*x[18*idx[jrow]+9];
2570       sum11 += v[jrow]*x[18*idx[jrow]+10];
2571       sum12 += v[jrow]*x[18*idx[jrow]+11];
2572       sum13 += v[jrow]*x[18*idx[jrow]+12];
2573       sum14 += v[jrow]*x[18*idx[jrow]+13];
2574       sum15 += v[jrow]*x[18*idx[jrow]+14];
2575       sum16 += v[jrow]*x[18*idx[jrow]+15];
2576       sum17 += v[jrow]*x[18*idx[jrow]+16];
2577       sum18 += v[jrow]*x[18*idx[jrow]+17];
2578       jrow++;
2579     }
2580     y[18*i]    += sum1;
2581     y[18*i+1]  += sum2;
2582     y[18*i+2]  += sum3;
2583     y[18*i+3]  += sum4;
2584     y[18*i+4]  += sum5;
2585     y[18*i+5]  += sum6;
2586     y[18*i+6]  += sum7;
2587     y[18*i+7]  += sum8;
2588     y[18*i+8]  += sum9;
2589     y[18*i+9]  += sum10;
2590     y[18*i+10] += sum11;
2591     y[18*i+11] += sum12;
2592     y[18*i+12] += sum13;
2593     y[18*i+13] += sum14;
2594     y[18*i+14] += sum15;
2595     y[18*i+15] += sum16;
2596     y[18*i+16] += sum17;
2597     y[18*i+17] += sum18;
2598   }
2599 
2600   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2601   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2602   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2603   PetscFunctionReturn(0);
2604 }
2605 
2606 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2607 {
2608   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2609   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2610   const PetscScalar *x,*v;
2611   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2612   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2613   PetscErrorCode    ierr;
2614   const PetscInt    m = b->AIJ->rmap->n,*idx;
2615   PetscInt          n,i;
2616 
2617   PetscFunctionBegin;
2618   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2619   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2620   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2621   for (i=0; i<m; i++) {
2622     idx     = a->j + a->i[i];
2623     v       = a->a + a->i[i];
2624     n       = a->i[i+1] - a->i[i];
2625     alpha1  = x[18*i];
2626     alpha2  = x[18*i+1];
2627     alpha3  = x[18*i+2];
2628     alpha4  = x[18*i+3];
2629     alpha5  = x[18*i+4];
2630     alpha6  = x[18*i+5];
2631     alpha7  = x[18*i+6];
2632     alpha8  = x[18*i+7];
2633     alpha9  = x[18*i+8];
2634     alpha10 = x[18*i+9];
2635     alpha11 = x[18*i+10];
2636     alpha12 = x[18*i+11];
2637     alpha13 = x[18*i+12];
2638     alpha14 = x[18*i+13];
2639     alpha15 = x[18*i+14];
2640     alpha16 = x[18*i+15];
2641     alpha17 = x[18*i+16];
2642     alpha18 = x[18*i+17];
2643     while (n-->0) {
2644       y[18*(*idx)]    += alpha1*(*v);
2645       y[18*(*idx)+1]  += alpha2*(*v);
2646       y[18*(*idx)+2]  += alpha3*(*v);
2647       y[18*(*idx)+3]  += alpha4*(*v);
2648       y[18*(*idx)+4]  += alpha5*(*v);
2649       y[18*(*idx)+5]  += alpha6*(*v);
2650       y[18*(*idx)+6]  += alpha7*(*v);
2651       y[18*(*idx)+7]  += alpha8*(*v);
2652       y[18*(*idx)+8]  += alpha9*(*v);
2653       y[18*(*idx)+9]  += alpha10*(*v);
2654       y[18*(*idx)+10] += alpha11*(*v);
2655       y[18*(*idx)+11] += alpha12*(*v);
2656       y[18*(*idx)+12] += alpha13*(*v);
2657       y[18*(*idx)+13] += alpha14*(*v);
2658       y[18*(*idx)+14] += alpha15*(*v);
2659       y[18*(*idx)+15] += alpha16*(*v);
2660       y[18*(*idx)+16] += alpha17*(*v);
2661       y[18*(*idx)+17] += alpha18*(*v);
2662       idx++; v++;
2663     }
2664   }
2665   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2666   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2667   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2668   PetscFunctionReturn(0);
2669 }
2670 
2671 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2672 {
2673   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2674   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2675   const PetscScalar *x,*v;
2676   PetscScalar       *y,*sums;
2677   PetscErrorCode    ierr;
2678   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2679   PetscInt          n,i,jrow,j,dof = b->dof,k;
2680 
2681   PetscFunctionBegin;
2682   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2683   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2684   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2685   idx  = a->j;
2686   v    = a->a;
2687   ii   = a->i;
2688 
2689   for (i=0; i<m; i++) {
2690     jrow = ii[i];
2691     n    = ii[i+1] - jrow;
2692     sums = y + dof*i;
2693     for (j=0; j<n; j++) {
2694       for (k=0; k<dof; k++) {
2695         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2696       }
2697       jrow++;
2698     }
2699   }
2700 
2701   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2702   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2703   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2704   PetscFunctionReturn(0);
2705 }
2706 
2707 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2708 {
2709   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2710   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2711   const PetscScalar *x,*v;
2712   PetscScalar       *y,*sums;
2713   PetscErrorCode    ierr;
2714   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2715   PetscInt          n,i,jrow,j,dof = b->dof,k;
2716 
2717   PetscFunctionBegin;
2718   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2719   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2720   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2721   idx  = a->j;
2722   v    = a->a;
2723   ii   = a->i;
2724 
2725   for (i=0; i<m; i++) {
2726     jrow = ii[i];
2727     n    = ii[i+1] - jrow;
2728     sums = y + dof*i;
2729     for (j=0; j<n; j++) {
2730       for (k=0; k<dof; k++) {
2731         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2732       }
2733       jrow++;
2734     }
2735   }
2736 
2737   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2738   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2739   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2740   PetscFunctionReturn(0);
2741 }
2742 
2743 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2744 {
2745   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2746   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2747   const PetscScalar *x,*v,*alpha;
2748   PetscScalar       *y;
2749   PetscErrorCode    ierr;
2750   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2751   PetscInt          n,i,k;
2752 
2753   PetscFunctionBegin;
2754   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2755   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2756   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2757   for (i=0; i<m; i++) {
2758     idx   = a->j + a->i[i];
2759     v     = a->a + a->i[i];
2760     n     = a->i[i+1] - a->i[i];
2761     alpha = x + dof*i;
2762     while (n-->0) {
2763       for (k=0; k<dof; k++) {
2764         y[dof*(*idx)+k] += alpha[k]*(*v);
2765       }
2766       idx++; v++;
2767     }
2768   }
2769   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2770   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2771   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2772   PetscFunctionReturn(0);
2773 }
2774 
2775 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2776 {
2777   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2778   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2779   const PetscScalar *x,*v,*alpha;
2780   PetscScalar       *y;
2781   PetscErrorCode    ierr;
2782   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2783   PetscInt          n,i,k;
2784 
2785   PetscFunctionBegin;
2786   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2787   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2788   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2789   for (i=0; i<m; i++) {
2790     idx   = a->j + a->i[i];
2791     v     = a->a + a->i[i];
2792     n     = a->i[i+1] - a->i[i];
2793     alpha = x + dof*i;
2794     while (n-->0) {
2795       for (k=0; k<dof; k++) {
2796         y[dof*(*idx)+k] += alpha[k]*(*v);
2797       }
2798       idx++; v++;
2799     }
2800   }
2801   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2802   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2803   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2804   PetscFunctionReturn(0);
2805 }
2806 
2807 /*===================================================================================*/
2808 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2809 {
2810   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2811   PetscErrorCode ierr;
2812 
2813   PetscFunctionBegin;
2814   /* start the scatter */
2815   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2816   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2817   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2818   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2819   PetscFunctionReturn(0);
2820 }
2821 
2822 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2823 {
2824   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2825   PetscErrorCode ierr;
2826 
2827   PetscFunctionBegin;
2828   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2829   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2830   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2831   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2832   PetscFunctionReturn(0);
2833 }
2834 
2835 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2836 {
2837   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2838   PetscErrorCode ierr;
2839 
2840   PetscFunctionBegin;
2841   /* start the scatter */
2842   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2843   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2844   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2845   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2846   PetscFunctionReturn(0);
2847 }
2848 
2849 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2850 {
2851   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2852   PetscErrorCode ierr;
2853 
2854   PetscFunctionBegin;
2855   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2856   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2857   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2858   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2859   PetscFunctionReturn(0);
2860 }
2861 
2862 /* ----------------------------------------------------------------*/
2863 PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
2864 {
2865   Mat_Product    *product = C->product;
2866 
2867   PetscFunctionBegin;
2868   if (product->type == MATPRODUCT_PtAP) {
2869     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2870   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]);
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
2875 {
2876   PetscErrorCode ierr;
2877   Mat_Product    *product = C->product;
2878   PetscBool      flg = PETSC_FALSE;
2879   Mat            A=product->A,P=product->B;
2880   PetscInt       alg=1; /* set default algorithm */
2881 #if !defined(PETSC_HAVE_HYPRE)
2882   const char     *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"};
2883   PetscInt       nalg=4;
2884 #else
2885   const char     *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"};
2886   PetscInt       nalg=5;
2887 #endif
2888 
2889   PetscFunctionBegin;
2890   if (product->type != MATPRODUCT_PtAP) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]);
2891 
2892   /* PtAP */
2893   /* Check matrix local sizes */
2894   if (A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%D, %D) != Prow (%D,%D)",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend);
2895   if (A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%D, %D) != Prow (%D,%D)",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend);
2896 
2897   /* Set the default algorithm */
2898   ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr);
2899   if (flg) {
2900     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2901   }
2902 
2903   /* Get runtime option */
2904   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
2905   ierr = PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2906   if (flg) {
2907     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2908   }
2909   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2910 
2911   ierr = PetscStrcmp(C->product->alg,"allatonce",&flg);CHKERRQ(ierr);
2912   if (flg) {
2913     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2914     PetscFunctionReturn(0);
2915   }
2916 
2917   ierr = PetscStrcmp(C->product->alg,"allatonce_merged",&flg);CHKERRQ(ierr);
2918   if (flg) {
2919     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2920     PetscFunctionReturn(0);
2921   }
2922 
2923   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
2924   ierr = PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n");CHKERRQ(ierr);
2925   ierr = MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);CHKERRQ(ierr);
2926   ierr = MatProductSetFromOptions(C);CHKERRQ(ierr);
2927   PetscFunctionReturn(0);
2928 }
2929 
2930 /* ----------------------------------------------------------------*/
2931 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)
2932 {
2933   PetscErrorCode     ierr;
2934   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2935   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2936   Mat                P         =pp->AIJ;
2937   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2938   PetscInt           *pti,*ptj,*ptJ;
2939   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2940   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2941   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2942   MatScalar          *ca;
2943   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2944 
2945   PetscFunctionBegin;
2946   /* Get ij structure of P^T */
2947   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2948 
2949   cn = pn*ppdof;
2950   /* Allocate ci array, arrays for fill computation and */
2951   /* free space for accumulating nonzero column info */
2952   ierr  = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr);
2953   ci[0] = 0;
2954 
2955   /* Work arrays for rows of P^T*A */
2956   ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr);
2957   ierr = PetscArrayzero(ptadenserow,an);CHKERRQ(ierr);
2958   ierr = PetscArrayzero(denserow,cn);CHKERRQ(ierr);
2959 
2960   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2961   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2962   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2963   ierr          = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr);
2964   current_space = free_space;
2965 
2966   /* Determine symbolic info for each row of C: */
2967   for (i=0; i<pn; i++) {
2968     ptnzi = pti[i+1] - pti[i];
2969     ptJ   = ptj + pti[i];
2970     for (dof=0; dof<ppdof; dof++) {
2971       ptanzi = 0;
2972       /* Determine symbolic row of PtA: */
2973       for (j=0; j<ptnzi; j++) {
2974         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2975         arow = ptJ[j]*ppdof + dof;
2976         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2977         anzj = ai[arow+1] - ai[arow];
2978         ajj  = aj + ai[arow];
2979         for (k=0; k<anzj; k++) {
2980           if (!ptadenserow[ajj[k]]) {
2981             ptadenserow[ajj[k]]    = -1;
2982             ptasparserow[ptanzi++] = ajj[k];
2983           }
2984         }
2985       }
2986       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2987       ptaj = ptasparserow;
2988       cnzi = 0;
2989       for (j=0; j<ptanzi; j++) {
2990         /* Get offset within block of P */
2991         pshift = *ptaj%ppdof;
2992         /* Get block row of P */
2993         prow = (*ptaj++)/ppdof; /* integer division */
2994         /* P has same number of nonzeros per row as the compressed form */
2995         pnzj = pi[prow+1] - pi[prow];
2996         pjj  = pj + pi[prow];
2997         for (k=0;k<pnzj;k++) {
2998           /* Locations in C are shifted by the offset within the block */
2999           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3000           if (!denserow[pjj[k]*ppdof+pshift]) {
3001             denserow[pjj[k]*ppdof+pshift] = -1;
3002             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
3003           }
3004         }
3005       }
3006 
3007       /* sort sparserow */
3008       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
3009 
3010       /* If free space is not available, make more free space */
3011       /* Double the amount of total space in the list */
3012       if (current_space->local_remaining<cnzi) {
3013         ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
3014       }
3015 
3016       /* Copy data into free space, and zero out denserows */
3017       ierr = PetscArraycpy(current_space->array,sparserow,cnzi);CHKERRQ(ierr);
3018 
3019       current_space->array           += cnzi;
3020       current_space->local_used      += cnzi;
3021       current_space->local_remaining -= cnzi;
3022 
3023       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
3024       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
3025 
3026       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3027       /*        For now, we will recompute what is needed. */
3028       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3029     }
3030   }
3031   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3032   /* Allocate space for cj, initialize cj, and */
3033   /* destroy list of free space and other temporary array(s) */
3034   ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr);
3035   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
3036   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
3037 
3038   /* Allocate space for ca */
3039   ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr);
3040 
3041   /* put together the new matrix */
3042   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C);CHKERRQ(ierr);
3043   ierr = MatSetBlockSize(C,pp->dof);CHKERRQ(ierr);
3044 
3045   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3046   /* Since these are PETSc arrays, change flags to free them as necessary. */
3047   c          = (Mat_SeqAIJ*)(C->data);
3048   c->free_a  = PETSC_TRUE;
3049   c->free_ij = PETSC_TRUE;
3050   c->nonew   = 0;
3051 
3052   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3053   C->ops->productnumeric = MatProductNumeric_PtAP;
3054 
3055   /* Clean up. */
3056   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
3057   PetscFunctionReturn(0);
3058 }
3059 
3060 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3061 {
3062   /* This routine requires testing -- first draft only */
3063   PetscErrorCode  ierr;
3064   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3065   Mat             P  =pp->AIJ;
3066   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
3067   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
3068   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3069   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3070   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3071   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3072   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3073   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3074   MatScalar       *ca=c->a,*caj,*apa;
3075 
3076   PetscFunctionBegin;
3077   /* Allocate temporary array for storage of one row of A*P */
3078   ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr);
3079 
3080   /* Clear old values in C */
3081   ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr);
3082 
3083   for (i=0; i<am; i++) {
3084     /* Form sparse row of A*P */
3085     anzi  = ai[i+1] - ai[i];
3086     apnzj = 0;
3087     for (j=0; j<anzi; j++) {
3088       /* Get offset within block of P */
3089       pshift = *aj%ppdof;
3090       /* Get block row of P */
3091       prow = *aj++/ppdof;   /* integer division */
3092       pnzj = pi[prow+1] - pi[prow];
3093       pjj  = pj + pi[prow];
3094       paj  = pa + pi[prow];
3095       for (k=0; k<pnzj; k++) {
3096         poffset = pjj[k]*ppdof+pshift;
3097         if (!apjdense[poffset]) {
3098           apjdense[poffset] = -1;
3099           apj[apnzj++]      = poffset;
3100         }
3101         apa[poffset] += (*aa)*paj[k];
3102       }
3103       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
3104       aa++;
3105     }
3106 
3107     /* Sort the j index array for quick sparse axpy. */
3108     /* Note: a array does not need sorting as it is in dense storage locations. */
3109     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
3110 
3111     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3112     prow    = i/ppdof; /* integer division */
3113     pshift  = i%ppdof;
3114     poffset = pi[prow];
3115     pnzi    = pi[prow+1] - poffset;
3116     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3117     pJ = pj+poffset;
3118     pA = pa+poffset;
3119     for (j=0; j<pnzi; j++) {
3120       crow = (*pJ)*ppdof+pshift;
3121       cjj  = cj + ci[crow];
3122       caj  = ca + ci[crow];
3123       pJ++;
3124       /* Perform sparse axpy operation.  Note cjj includes apj. */
3125       for (k=0,nextap=0; nextap<apnzj; k++) {
3126         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3127       }
3128       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
3129       pA++;
3130     }
3131 
3132     /* Zero the current row info for A*P */
3133     for (j=0; j<apnzj; j++) {
3134       apa[apj[j]]      = 0.;
3135       apjdense[apj[j]] = 0;
3136     }
3137   }
3138 
3139   /* Assemble the final matrix and clean up */
3140   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3141   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3142   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
3143   PetscFunctionReturn(0);
3144 }
3145 
3146 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
3147 {
3148   PetscErrorCode      ierr;
3149   Mat_Product         *product = C->product;
3150   Mat                 A=product->A,P=product->B;
3151 
3152   PetscFunctionBegin;
3153   ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C);CHKERRQ(ierr);
3154   PetscFunctionReturn(0);
3155 }
3156 
3157 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3158 {
3159   PetscFunctionBegin;
3160   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3161   PetscFunctionReturn(0);
3162 }
3163 
3164 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3165 {
3166   PetscFunctionBegin;
3167   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3168   PetscFunctionReturn(0);
3169 }
3170 
3171 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);
3172 
3173 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3174 {
3175   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3176   PetscErrorCode  ierr;
3177 
3178   PetscFunctionBegin;
3179 
3180   ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);CHKERRQ(ierr);
3181   PetscFunctionReturn(0);
3182 }
3183 
3184 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat);
3185 
3186 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)
3187 {
3188   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3189   PetscErrorCode  ierr;
3190 
3191   PetscFunctionBegin;
3192   ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr);
3193   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3194   PetscFunctionReturn(0);
3195 }
3196 
3197 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);
3198 
3199 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3200 {
3201   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3202   PetscErrorCode  ierr;
3203 
3204   PetscFunctionBegin;
3205 
3206   ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);CHKERRQ(ierr);
3207   PetscFunctionReturn(0);
3208 }
3209 
3210 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat);
3211 
3212 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)
3213 {
3214   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3215   PetscErrorCode  ierr;
3216 
3217   PetscFunctionBegin;
3218 
3219   ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr);
3220   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3221   PetscFunctionReturn(0);
3222 }
3223 
3224 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3225 {
3226   PetscErrorCode      ierr;
3227   Mat_Product         *product = C->product;
3228   Mat                 A=product->A,P=product->B;
3229   PetscBool           flg;
3230 
3231   PetscFunctionBegin;
3232   ierr = PetscStrcmp(product->alg,"allatonce",&flg);CHKERRQ(ierr);
3233   if (flg) {
3234     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C);CHKERRQ(ierr);
3235     C->ops->productnumeric = MatProductNumeric_PtAP;
3236     PetscFunctionReturn(0);
3237   }
3238 
3239   ierr = PetscStrcmp(product->alg,"allatonce_merged",&flg);CHKERRQ(ierr);
3240   if (flg) {
3241     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C);CHKERRQ(ierr);
3242     C->ops->productnumeric = MatProductNumeric_PtAP;
3243     PetscFunctionReturn(0);
3244   }
3245 
3246   SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
3247   PetscFunctionReturn(0);
3248 }
3249 
3250 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3251 {
3252   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3253   Mat            a    = b->AIJ,B;
3254   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3255   PetscErrorCode ierr;
3256   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3257   PetscInt       *cols;
3258   PetscScalar    *vals;
3259 
3260   PetscFunctionBegin;
3261   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3262   ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr);
3263   for (i=0; i<m; i++) {
3264     nmax = PetscMax(nmax,aij->ilen[i]);
3265     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3266   }
3267   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3268   ierr = MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);CHKERRQ(ierr);
3269   ierr = MatSetType(B,newtype);CHKERRQ(ierr);
3270   ierr = MatSeqAIJSetPreallocation(B,0,ilen);CHKERRQ(ierr);
3271   ierr = PetscFree(ilen);CHKERRQ(ierr);
3272   ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr);
3273   ii   = 0;
3274   for (i=0; i<m; i++) {
3275     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3276     for (j=0; j<dof; j++) {
3277       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3278       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3279       ii++;
3280     }
3281     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3282   }
3283   ierr = PetscFree(icols);CHKERRQ(ierr);
3284   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3285   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3286 
3287   if (reuse == MAT_INPLACE_MATRIX) {
3288     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3289   } else {
3290     *newmat = B;
3291   }
3292   PetscFunctionReturn(0);
3293 }
3294 
3295 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3296 
3297 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3298 {
3299   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3300   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3301   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3302   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3303   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3304   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3305   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3306   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3307   PetscInt       rstart,cstart,*garray,ii,k;
3308   PetscErrorCode ierr;
3309   PetscScalar    *vals,*ovals;
3310 
3311   PetscFunctionBegin;
3312   ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr);
3313   for (i=0; i<A->rmap->n/dof; i++) {
3314     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3315     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3316     for (j=0; j<dof; j++) {
3317       dnz[dof*i+j] = AIJ->ilen[i];
3318       onz[dof*i+j] = OAIJ->ilen[i];
3319     }
3320   }
3321   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3322   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3323   ierr = MatSetType(B,newtype);CHKERRQ(ierr);
3324   ierr = MatMPIAIJSetPreallocation(B,0,dnz,0,onz);CHKERRQ(ierr);
3325   ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr);
3326   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
3327 
3328   ierr   = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr);
3329   rstart = dof*maij->A->rmap->rstart;
3330   cstart = dof*maij->A->cmap->rstart;
3331   garray = mpiaij->garray;
3332 
3333   ii = rstart;
3334   for (i=0; i<A->rmap->n/dof; i++) {
3335     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3336     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3337     for (j=0; j<dof; j++) {
3338       for (k=0; k<ncols; k++) {
3339         icols[k] = cstart + dof*cols[k]+j;
3340       }
3341       for (k=0; k<oncols; k++) {
3342         oicols[k] = dof*garray[ocols[k]]+j;
3343       }
3344       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3345       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
3346       ii++;
3347     }
3348     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3349     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3350   }
3351   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
3352 
3353   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3354   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3355 
3356   if (reuse == MAT_INPLACE_MATRIX) {
3357     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3358     ((PetscObject)A)->refct = 1;
3359 
3360     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3361 
3362     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3363   } else {
3364     *newmat = B;
3365   }
3366   PetscFunctionReturn(0);
3367 }
3368 
3369 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3370 {
3371   PetscErrorCode ierr;
3372   Mat            A;
3373 
3374   PetscFunctionBegin;
3375   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3376   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
3377   ierr = MatDestroy(&A);CHKERRQ(ierr);
3378   PetscFunctionReturn(0);
3379 }
3380 
3381 PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3382 {
3383   PetscErrorCode ierr;
3384   Mat            A;
3385 
3386   PetscFunctionBegin;
3387   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3388   ierr = MatCreateSubMatrices(A,n,irow,icol,scall,submat);CHKERRQ(ierr);
3389   ierr = MatDestroy(&A);CHKERRQ(ierr);
3390   PetscFunctionReturn(0);
3391 }
3392 
3393 /* ---------------------------------------------------------------------------------- */
3394 /*@
3395   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3396   operations for multicomponent problems.  It interpolates each component the same
3397   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3398   and MATMPIAIJ for distributed matrices.
3399 
3400   Collective
3401 
3402   Input Parameters:
3403 + A - the AIJ matrix describing the action on blocks
3404 - dof - the block size (number of components per node)
3405 
3406   Output Parameter:
3407 . maij - the new MAIJ matrix
3408 
3409   Operations provided:
3410 + MatMult
3411 . MatMultTranspose
3412 . MatMultAdd
3413 . MatMultTransposeAdd
3414 - MatView
3415 
3416   Level: advanced
3417 
3418 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3419 @*/
3420 PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3421 {
3422   PetscErrorCode ierr;
3423   PetscMPIInt    size;
3424   PetscInt       n;
3425   Mat            B;
3426 #if defined(PETSC_HAVE_CUDA)
3427   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3428   PetscBool      convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3429 #endif
3430 
3431   PetscFunctionBegin;
3432   dof  = PetscAbs(dof);
3433   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3434 
3435   if (dof == 1) *maij = A;
3436   else {
3437     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3438     /* propagate vec type */
3439     ierr = MatSetVecType(B,A->defaultvectype);CHKERRQ(ierr);
3440     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3441     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3442     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3443     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3444     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3445 
3446     B->assembled = PETSC_TRUE;
3447 
3448     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3449     if (size == 1) {
3450       Mat_SeqMAIJ *b;
3451 
3452       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
3453 
3454       B->ops->setup   = NULL;
3455       B->ops->destroy = MatDestroy_SeqMAIJ;
3456       B->ops->view    = MatView_SeqMAIJ;
3457 
3458       b               = (Mat_SeqMAIJ*)B->data;
3459       b->dof          = dof;
3460       b->AIJ          = A;
3461 
3462       if (dof == 2) {
3463         B->ops->mult             = MatMult_SeqMAIJ_2;
3464         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3465         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3466         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3467       } else if (dof == 3) {
3468         B->ops->mult             = MatMult_SeqMAIJ_3;
3469         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3470         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3471         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3472       } else if (dof == 4) {
3473         B->ops->mult             = MatMult_SeqMAIJ_4;
3474         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3475         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3476         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3477       } else if (dof == 5) {
3478         B->ops->mult             = MatMult_SeqMAIJ_5;
3479         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3480         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3481         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3482       } else if (dof == 6) {
3483         B->ops->mult             = MatMult_SeqMAIJ_6;
3484         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3485         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3486         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3487       } else if (dof == 7) {
3488         B->ops->mult             = MatMult_SeqMAIJ_7;
3489         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3490         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3491         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3492       } else if (dof == 8) {
3493         B->ops->mult             = MatMult_SeqMAIJ_8;
3494         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3495         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3496         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3497       } else if (dof == 9) {
3498         B->ops->mult             = MatMult_SeqMAIJ_9;
3499         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3500         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3501         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3502       } else if (dof == 10) {
3503         B->ops->mult             = MatMult_SeqMAIJ_10;
3504         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3505         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3506         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3507       } else if (dof == 11) {
3508         B->ops->mult             = MatMult_SeqMAIJ_11;
3509         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3510         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3511         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3512       } else if (dof == 16) {
3513         B->ops->mult             = MatMult_SeqMAIJ_16;
3514         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3515         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3516         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3517       } else if (dof == 18) {
3518         B->ops->mult             = MatMult_SeqMAIJ_18;
3519         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3520         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3521         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3522       } else {
3523         B->ops->mult             = MatMult_SeqMAIJ_N;
3524         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3525         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3526         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3527       }
3528 #if defined(PETSC_HAVE_CUDA)
3529       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3530 #endif
3531       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3532       ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3533     } else {
3534       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3535       Mat_MPIMAIJ *b;
3536       IS          from,to;
3537       Vec         gvec;
3538 
3539       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3540 
3541       B->ops->setup            = NULL;
3542       B->ops->destroy          = MatDestroy_MPIMAIJ;
3543       B->ops->view             = MatView_MPIMAIJ;
3544 
3545       b      = (Mat_MPIMAIJ*)B->data;
3546       b->dof = dof;
3547       b->A   = A;
3548 
3549       ierr = MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);CHKERRQ(ierr);
3550       ierr = MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);CHKERRQ(ierr);
3551 
3552       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3553       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3554       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
3555       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3556       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3557 
3558       /* create two temporary Index sets for build scatter gather */
3559       ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3560       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3561 
3562       /* create temporary global vector to generate scatter context */
3563       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
3564 
3565       /* generate the scatter context */
3566       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3567 
3568       ierr = ISDestroy(&from);CHKERRQ(ierr);
3569       ierr = ISDestroy(&to);CHKERRQ(ierr);
3570       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3571 
3572       B->ops->mult             = MatMult_MPIMAIJ_dof;
3573       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3574       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3575       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3576 
3577 #if defined(PETSC_HAVE_CUDA)
3578       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3579 #endif
3580       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3581       ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ);CHKERRQ(ierr);
3582     }
3583     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3584     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3585     ierr  = MatSetFromOptions(B);CHKERRQ(ierr);
3586     ierr  = MatSetUp(B);CHKERRQ(ierr);
3587 #if defined(PETSC_HAVE_CUDA)
3588     /* temporary until we have CUDA implementation of MAIJ */
3589     {
3590       PetscBool flg;
3591       if (convert) {
3592         ierr = PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");CHKERRQ(ierr);
3593         if (flg) {
3594           ierr = MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
3595         }
3596       }
3597     }
3598 #endif
3599     *maij = B;
3600     ierr  = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr);
3601   }
3602   PetscFunctionReturn(0);
3603 }
3604