xref: /petsc/src/mat/impls/maij/maij.c (revision 94ef8dde638caef1d0cd84a7dc8a2db65fcda8b6)
1 
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20 #include <../src/mat/utils/freespace.h>
21 
22 /*@
23    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
24 
25    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
26 
27    Input Parameter:
28 .  A - the MAIJ matrix
29 
30    Output Parameter:
31 .  B - the AIJ matrix
32 
33    Level: advanced
34 
35    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
36 
37 .seealso: MatCreateMAIJ()
38 @*/
39 PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
40 {
41   PetscErrorCode ierr;
42   PetscBool      ismpimaij,isseqmaij;
43 
44   PetscFunctionBegin;
45   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
46   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
47   if (ismpimaij) {
48     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
49 
50     *B = b->A;
51   } else if (isseqmaij) {
52     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
53 
54     *B = b->AIJ;
55   } else {
56     *B = A;
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 /*@C
62    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
63 
64    Logically Collective
65 
66    Input Parameter:
67 +  A - the MAIJ matrix
68 -  dof - the block size for the new matrix
69 
70    Output Parameter:
71 .  B - the new MAIJ matrix
72 
73    Level: advanced
74 
75 .seealso: MatCreateMAIJ()
76 @*/
77 PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
78 {
79   PetscErrorCode ierr;
80   Mat            Aij = NULL;
81 
82   PetscFunctionBegin;
83   PetscValidLogicalCollectiveInt(A,dof,2);
84   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
85   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
90 {
91   PetscErrorCode ierr;
92   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
93 
94   PetscFunctionBegin;
95   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
96   ierr = PetscFree(A->data);CHKERRQ(ierr);
97   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr);
98   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr);
99   PetscFunctionReturn(0);
100 }
101 
102 PetscErrorCode MatSetUp_MAIJ(Mat A)
103 {
104   PetscFunctionBegin;
105   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
106   PetscFunctionReturn(0);
107 }
108 
109 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
110 {
111   PetscErrorCode ierr;
112   Mat            B;
113 
114   PetscFunctionBegin;
115   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
116   ierr = MatView(B,viewer);CHKERRQ(ierr);
117   ierr = MatDestroy(&B);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
122 {
123   PetscErrorCode ierr;
124   Mat            B;
125 
126   PetscFunctionBegin;
127   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
128   ierr = MatView(B,viewer);CHKERRQ(ierr);
129   ierr = MatDestroy(&B);CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
134 {
135   PetscErrorCode ierr;
136   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
137 
138   PetscFunctionBegin;
139   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
140   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
141   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
142   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
143   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
144   ierr = PetscFree(A->data);CHKERRQ(ierr);
145   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr);
146   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr);
147   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
148   PetscFunctionReturn(0);
149 }
150 
151 /*MC
152   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
153   multicomponent problems, interpolating or restricting each component the same way independently.
154   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
155 
156   Operations provided:
157 . MatMult
158 . MatMultTranspose
159 . MatMultAdd
160 . MatMultTransposeAdd
161 
162   Level: advanced
163 
164 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
165 M*/
166 
167 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
168 {
169   PetscErrorCode ierr;
170   Mat_MPIMAIJ    *b;
171   PetscMPIInt    size;
172 
173   PetscFunctionBegin;
174   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
175   A->data  = (void*)b;
176 
177   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
178 
179   A->ops->setup = MatSetUp_MAIJ;
180 
181   b->AIJ  = 0;
182   b->dof  = 0;
183   b->OAIJ = 0;
184   b->ctx  = 0;
185   b->w    = 0;
186   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
187   if (size == 1) {
188     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
189   } else {
190     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
191   }
192   A->preallocated  = PETSC_TRUE;
193   A->assembled     = PETSC_TRUE;
194   PetscFunctionReturn(0);
195 }
196 
197 /* --------------------------------------------------------------------------------------*/
198 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
199 {
200   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
201   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
202   const PetscScalar *x,*v;
203   PetscScalar       *y, sum1, sum2;
204   PetscErrorCode    ierr;
205   PetscInt          nonzerorow=0,n,i,jrow,j;
206   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
207 
208   PetscFunctionBegin;
209   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
210   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
211   idx  = a->j;
212   v    = a->a;
213   ii   = a->i;
214 
215   for (i=0; i<m; i++) {
216     jrow  = ii[i];
217     n     = ii[i+1] - jrow;
218     sum1  = 0.0;
219     sum2  = 0.0;
220 
221     nonzerorow += (n>0);
222     for (j=0; j<n; j++) {
223       sum1 += v[jrow]*x[2*idx[jrow]];
224       sum2 += v[jrow]*x[2*idx[jrow]+1];
225       jrow++;
226     }
227     y[2*i]   = sum1;
228     y[2*i+1] = sum2;
229   }
230 
231   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
232   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
233   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
234   PetscFunctionReturn(0);
235 }
236 
237 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
238 {
239   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
240   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
241   const PetscScalar *x,*v;
242   PetscScalar       *y,alpha1,alpha2;
243   PetscErrorCode    ierr;
244   PetscInt          n,i;
245   const PetscInt    m = b->AIJ->rmap->n,*idx;
246 
247   PetscFunctionBegin;
248   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
249   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
250   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
251 
252   for (i=0; i<m; i++) {
253     idx    = a->j + a->i[i];
254     v      = a->a + a->i[i];
255     n      = a->i[i+1] - a->i[i];
256     alpha1 = x[2*i];
257     alpha2 = x[2*i+1];
258     while (n-->0) {
259       y[2*(*idx)]   += alpha1*(*v);
260       y[2*(*idx)+1] += alpha2*(*v);
261       idx++; v++;
262     }
263   }
264   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
265   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
266   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
271 {
272   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
273   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
274   const PetscScalar *x,*v;
275   PetscScalar       *y,sum1, sum2;
276   PetscErrorCode    ierr;
277   PetscInt          n,i,jrow,j;
278   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
279 
280   PetscFunctionBegin;
281   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
282   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
283   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
284   idx  = a->j;
285   v    = a->a;
286   ii   = a->i;
287 
288   for (i=0; i<m; i++) {
289     jrow = ii[i];
290     n    = ii[i+1] - jrow;
291     sum1 = 0.0;
292     sum2 = 0.0;
293     for (j=0; j<n; j++) {
294       sum1 += v[jrow]*x[2*idx[jrow]];
295       sum2 += v[jrow]*x[2*idx[jrow]+1];
296       jrow++;
297     }
298     y[2*i]   += sum1;
299     y[2*i+1] += sum2;
300   }
301 
302   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
303   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
304   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
305   PetscFunctionReturn(0);
306 }
307 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
308 {
309   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
310   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
311   const PetscScalar *x,*v;
312   PetscScalar       *y,alpha1,alpha2;
313   PetscErrorCode    ierr;
314   PetscInt          n,i;
315   const PetscInt    m = b->AIJ->rmap->n,*idx;
316 
317   PetscFunctionBegin;
318   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
319   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
320   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
321 
322   for (i=0; i<m; i++) {
323     idx    = a->j + a->i[i];
324     v      = a->a + a->i[i];
325     n      = a->i[i+1] - a->i[i];
326     alpha1 = x[2*i];
327     alpha2 = x[2*i+1];
328     while (n-->0) {
329       y[2*(*idx)]   += alpha1*(*v);
330       y[2*(*idx)+1] += alpha2*(*v);
331       idx++; v++;
332     }
333   }
334   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
335   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
336   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
337   PetscFunctionReturn(0);
338 }
339 /* --------------------------------------------------------------------------------------*/
340 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
341 {
342   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
343   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
344   const PetscScalar *x,*v;
345   PetscScalar       *y,sum1, sum2, sum3;
346   PetscErrorCode    ierr;
347   PetscInt          nonzerorow=0,n,i,jrow,j;
348   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
349 
350   PetscFunctionBegin;
351   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
352   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
353   idx  = a->j;
354   v    = a->a;
355   ii   = a->i;
356 
357   for (i=0; i<m; i++) {
358     jrow  = ii[i];
359     n     = ii[i+1] - jrow;
360     sum1  = 0.0;
361     sum2  = 0.0;
362     sum3  = 0.0;
363 
364     nonzerorow += (n>0);
365     for (j=0; j<n; j++) {
366       sum1 += v[jrow]*x[3*idx[jrow]];
367       sum2 += v[jrow]*x[3*idx[jrow]+1];
368       sum3 += v[jrow]*x[3*idx[jrow]+2];
369       jrow++;
370      }
371     y[3*i]   = sum1;
372     y[3*i+1] = sum2;
373     y[3*i+2] = sum3;
374   }
375 
376   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
377   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
378   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
379   PetscFunctionReturn(0);
380 }
381 
382 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
383 {
384   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
385   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
386   const PetscScalar *x,*v;
387   PetscScalar       *y,alpha1,alpha2,alpha3;
388   PetscErrorCode    ierr;
389   PetscInt          n,i;
390   const PetscInt    m = b->AIJ->rmap->n,*idx;
391 
392   PetscFunctionBegin;
393   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
394   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
395   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
396 
397   for (i=0; i<m; i++) {
398     idx    = a->j + a->i[i];
399     v      = a->a + a->i[i];
400     n      = a->i[i+1] - a->i[i];
401     alpha1 = x[3*i];
402     alpha2 = x[3*i+1];
403     alpha3 = x[3*i+2];
404     while (n-->0) {
405       y[3*(*idx)]   += alpha1*(*v);
406       y[3*(*idx)+1] += alpha2*(*v);
407       y[3*(*idx)+2] += alpha3*(*v);
408       idx++; v++;
409     }
410   }
411   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
412   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
413   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
418 {
419   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
420   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
421   const PetscScalar *x,*v;
422   PetscScalar       *y,sum1, sum2, sum3;
423   PetscErrorCode    ierr;
424   PetscInt          n,i,jrow,j;
425   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
426 
427   PetscFunctionBegin;
428   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
429   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
430   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
431   idx  = a->j;
432   v    = a->a;
433   ii   = a->i;
434 
435   for (i=0; i<m; i++) {
436     jrow = ii[i];
437     n    = ii[i+1] - jrow;
438     sum1 = 0.0;
439     sum2 = 0.0;
440     sum3 = 0.0;
441     for (j=0; j<n; j++) {
442       sum1 += v[jrow]*x[3*idx[jrow]];
443       sum2 += v[jrow]*x[3*idx[jrow]+1];
444       sum3 += v[jrow]*x[3*idx[jrow]+2];
445       jrow++;
446     }
447     y[3*i]   += sum1;
448     y[3*i+1] += sum2;
449     y[3*i+2] += sum3;
450   }
451 
452   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
453   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
454   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
455   PetscFunctionReturn(0);
456 }
457 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
458 {
459   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
460   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
461   const PetscScalar *x,*v;
462   PetscScalar       *y,alpha1,alpha2,alpha3;
463   PetscErrorCode    ierr;
464   PetscInt          n,i;
465   const PetscInt    m = b->AIJ->rmap->n,*idx;
466 
467   PetscFunctionBegin;
468   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
469   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
470   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
471   for (i=0; i<m; i++) {
472     idx    = a->j + a->i[i];
473     v      = a->a + a->i[i];
474     n      = a->i[i+1] - a->i[i];
475     alpha1 = x[3*i];
476     alpha2 = x[3*i+1];
477     alpha3 = x[3*i+2];
478     while (n-->0) {
479       y[3*(*idx)]   += alpha1*(*v);
480       y[3*(*idx)+1] += alpha2*(*v);
481       y[3*(*idx)+2] += alpha3*(*v);
482       idx++; v++;
483     }
484   }
485   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
486   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
487   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 /* ------------------------------------------------------------------------------*/
492 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
493 {
494   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
495   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
496   const PetscScalar *x,*v;
497   PetscScalar       *y,sum1, sum2, sum3, sum4;
498   PetscErrorCode    ierr;
499   PetscInt          nonzerorow=0,n,i,jrow,j;
500   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
501 
502   PetscFunctionBegin;
503   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
504   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
505   idx  = a->j;
506   v    = a->a;
507   ii   = a->i;
508 
509   for (i=0; i<m; i++) {
510     jrow        = ii[i];
511     n           = ii[i+1] - jrow;
512     sum1        = 0.0;
513     sum2        = 0.0;
514     sum3        = 0.0;
515     sum4        = 0.0;
516     nonzerorow += (n>0);
517     for (j=0; j<n; j++) {
518       sum1 += v[jrow]*x[4*idx[jrow]];
519       sum2 += v[jrow]*x[4*idx[jrow]+1];
520       sum3 += v[jrow]*x[4*idx[jrow]+2];
521       sum4 += v[jrow]*x[4*idx[jrow]+3];
522       jrow++;
523     }
524     y[4*i]   = sum1;
525     y[4*i+1] = sum2;
526     y[4*i+2] = sum3;
527     y[4*i+3] = sum4;
528   }
529 
530   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
531   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
532   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
537 {
538   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
539   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
540   const PetscScalar *x,*v;
541   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
542   PetscErrorCode    ierr;
543   PetscInt          n,i;
544   const PetscInt    m = b->AIJ->rmap->n,*idx;
545 
546   PetscFunctionBegin;
547   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
548   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
549   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
550   for (i=0; i<m; i++) {
551     idx    = a->j + a->i[i];
552     v      = a->a + a->i[i];
553     n      = a->i[i+1] - a->i[i];
554     alpha1 = x[4*i];
555     alpha2 = x[4*i+1];
556     alpha3 = x[4*i+2];
557     alpha4 = x[4*i+3];
558     while (n-->0) {
559       y[4*(*idx)]   += alpha1*(*v);
560       y[4*(*idx)+1] += alpha2*(*v);
561       y[4*(*idx)+2] += alpha3*(*v);
562       y[4*(*idx)+3] += alpha4*(*v);
563       idx++; v++;
564     }
565   }
566   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
567   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
568   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
569   PetscFunctionReturn(0);
570 }
571 
572 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
573 {
574   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
575   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
576   const PetscScalar *x,*v;
577   PetscScalar       *y,sum1, sum2, sum3, sum4;
578   PetscErrorCode    ierr;
579   PetscInt          n,i,jrow,j;
580   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
581 
582   PetscFunctionBegin;
583   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
584   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
585   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
586   idx  = a->j;
587   v    = a->a;
588   ii   = a->i;
589 
590   for (i=0; i<m; i++) {
591     jrow = ii[i];
592     n    = ii[i+1] - jrow;
593     sum1 = 0.0;
594     sum2 = 0.0;
595     sum3 = 0.0;
596     sum4 = 0.0;
597     for (j=0; j<n; j++) {
598       sum1 += v[jrow]*x[4*idx[jrow]];
599       sum2 += v[jrow]*x[4*idx[jrow]+1];
600       sum3 += v[jrow]*x[4*idx[jrow]+2];
601       sum4 += v[jrow]*x[4*idx[jrow]+3];
602       jrow++;
603     }
604     y[4*i]   += sum1;
605     y[4*i+1] += sum2;
606     y[4*i+2] += sum3;
607     y[4*i+3] += sum4;
608   }
609 
610   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
611   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
612   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
613   PetscFunctionReturn(0);
614 }
615 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
616 {
617   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
618   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
619   const PetscScalar *x,*v;
620   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
621   PetscErrorCode    ierr;
622   PetscInt          n,i;
623   const PetscInt    m = b->AIJ->rmap->n,*idx;
624 
625   PetscFunctionBegin;
626   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
627   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
628   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
629 
630   for (i=0; i<m; i++) {
631     idx    = a->j + a->i[i];
632     v      = a->a + a->i[i];
633     n      = a->i[i+1] - a->i[i];
634     alpha1 = x[4*i];
635     alpha2 = x[4*i+1];
636     alpha3 = x[4*i+2];
637     alpha4 = x[4*i+3];
638     while (n-->0) {
639       y[4*(*idx)]   += alpha1*(*v);
640       y[4*(*idx)+1] += alpha2*(*v);
641       y[4*(*idx)+2] += alpha3*(*v);
642       y[4*(*idx)+3] += alpha4*(*v);
643       idx++; v++;
644     }
645   }
646   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
647   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
648   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
649   PetscFunctionReturn(0);
650 }
651 /* ------------------------------------------------------------------------------*/
652 
653 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
654 {
655   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
656   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
657   const PetscScalar *x,*v;
658   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
659   PetscErrorCode    ierr;
660   PetscInt          nonzerorow=0,n,i,jrow,j;
661   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
662 
663   PetscFunctionBegin;
664   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
665   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
666   idx  = a->j;
667   v    = a->a;
668   ii   = a->i;
669 
670   for (i=0; i<m; i++) {
671     jrow  = ii[i];
672     n     = ii[i+1] - jrow;
673     sum1  = 0.0;
674     sum2  = 0.0;
675     sum3  = 0.0;
676     sum4  = 0.0;
677     sum5  = 0.0;
678 
679     nonzerorow += (n>0);
680     for (j=0; j<n; j++) {
681       sum1 += v[jrow]*x[5*idx[jrow]];
682       sum2 += v[jrow]*x[5*idx[jrow]+1];
683       sum3 += v[jrow]*x[5*idx[jrow]+2];
684       sum4 += v[jrow]*x[5*idx[jrow]+3];
685       sum5 += v[jrow]*x[5*idx[jrow]+4];
686       jrow++;
687     }
688     y[5*i]   = sum1;
689     y[5*i+1] = sum2;
690     y[5*i+2] = sum3;
691     y[5*i+3] = sum4;
692     y[5*i+4] = sum5;
693   }
694 
695   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
696   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
697   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
698   PetscFunctionReturn(0);
699 }
700 
701 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
702 {
703   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
704   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
705   const PetscScalar *x,*v;
706   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
707   PetscErrorCode    ierr;
708   PetscInt          n,i;
709   const PetscInt    m = b->AIJ->rmap->n,*idx;
710 
711   PetscFunctionBegin;
712   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
713   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
714   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
715 
716   for (i=0; i<m; i++) {
717     idx    = a->j + a->i[i];
718     v      = a->a + a->i[i];
719     n      = a->i[i+1] - a->i[i];
720     alpha1 = x[5*i];
721     alpha2 = x[5*i+1];
722     alpha3 = x[5*i+2];
723     alpha4 = x[5*i+3];
724     alpha5 = x[5*i+4];
725     while (n-->0) {
726       y[5*(*idx)]   += alpha1*(*v);
727       y[5*(*idx)+1] += alpha2*(*v);
728       y[5*(*idx)+2] += alpha3*(*v);
729       y[5*(*idx)+3] += alpha4*(*v);
730       y[5*(*idx)+4] += alpha5*(*v);
731       idx++; v++;
732     }
733   }
734   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
735   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
736   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
741 {
742   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
743   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
744   const PetscScalar *x,*v;
745   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
746   PetscErrorCode    ierr;
747   PetscInt          n,i,jrow,j;
748   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
749 
750   PetscFunctionBegin;
751   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
752   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
753   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
754   idx  = a->j;
755   v    = a->a;
756   ii   = a->i;
757 
758   for (i=0; i<m; i++) {
759     jrow = ii[i];
760     n    = ii[i+1] - jrow;
761     sum1 = 0.0;
762     sum2 = 0.0;
763     sum3 = 0.0;
764     sum4 = 0.0;
765     sum5 = 0.0;
766     for (j=0; j<n; j++) {
767       sum1 += v[jrow]*x[5*idx[jrow]];
768       sum2 += v[jrow]*x[5*idx[jrow]+1];
769       sum3 += v[jrow]*x[5*idx[jrow]+2];
770       sum4 += v[jrow]*x[5*idx[jrow]+3];
771       sum5 += v[jrow]*x[5*idx[jrow]+4];
772       jrow++;
773     }
774     y[5*i]   += sum1;
775     y[5*i+1] += sum2;
776     y[5*i+2] += sum3;
777     y[5*i+3] += sum4;
778     y[5*i+4] += sum5;
779   }
780 
781   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
782   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
783   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
784   PetscFunctionReturn(0);
785 }
786 
787 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
788 {
789   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
790   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
791   const PetscScalar *x,*v;
792   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
793   PetscErrorCode    ierr;
794   PetscInt          n,i;
795   const PetscInt    m = b->AIJ->rmap->n,*idx;
796 
797   PetscFunctionBegin;
798   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
799   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
800   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
801 
802   for (i=0; i<m; i++) {
803     idx    = a->j + a->i[i];
804     v      = a->a + a->i[i];
805     n      = a->i[i+1] - a->i[i];
806     alpha1 = x[5*i];
807     alpha2 = x[5*i+1];
808     alpha3 = x[5*i+2];
809     alpha4 = x[5*i+3];
810     alpha5 = x[5*i+4];
811     while (n-->0) {
812       y[5*(*idx)]   += alpha1*(*v);
813       y[5*(*idx)+1] += alpha2*(*v);
814       y[5*(*idx)+2] += alpha3*(*v);
815       y[5*(*idx)+3] += alpha4*(*v);
816       y[5*(*idx)+4] += alpha5*(*v);
817       idx++; v++;
818     }
819   }
820   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
821   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
822   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
823   PetscFunctionReturn(0);
824 }
825 
826 /* ------------------------------------------------------------------------------*/
827 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
828 {
829   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
830   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
831   const PetscScalar *x,*v;
832   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
833   PetscErrorCode    ierr;
834   PetscInt          nonzerorow=0,n,i,jrow,j;
835   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
836 
837   PetscFunctionBegin;
838   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
839   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
840   idx  = a->j;
841   v    = a->a;
842   ii   = a->i;
843 
844   for (i=0; i<m; i++) {
845     jrow  = ii[i];
846     n     = ii[i+1] - jrow;
847     sum1  = 0.0;
848     sum2  = 0.0;
849     sum3  = 0.0;
850     sum4  = 0.0;
851     sum5  = 0.0;
852     sum6  = 0.0;
853 
854     nonzerorow += (n>0);
855     for (j=0; j<n; j++) {
856       sum1 += v[jrow]*x[6*idx[jrow]];
857       sum2 += v[jrow]*x[6*idx[jrow]+1];
858       sum3 += v[jrow]*x[6*idx[jrow]+2];
859       sum4 += v[jrow]*x[6*idx[jrow]+3];
860       sum5 += v[jrow]*x[6*idx[jrow]+4];
861       sum6 += v[jrow]*x[6*idx[jrow]+5];
862       jrow++;
863     }
864     y[6*i]   = sum1;
865     y[6*i+1] = sum2;
866     y[6*i+2] = sum3;
867     y[6*i+3] = sum4;
868     y[6*i+4] = sum5;
869     y[6*i+5] = sum6;
870   }
871 
872   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
873   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
874   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
875   PetscFunctionReturn(0);
876 }
877 
878 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
879 {
880   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
881   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
882   const PetscScalar *x,*v;
883   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
884   PetscErrorCode    ierr;
885   PetscInt          n,i;
886   const PetscInt    m = b->AIJ->rmap->n,*idx;
887 
888   PetscFunctionBegin;
889   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
890   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
891   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
892 
893   for (i=0; i<m; i++) {
894     idx    = a->j + a->i[i];
895     v      = a->a + a->i[i];
896     n      = a->i[i+1] - a->i[i];
897     alpha1 = x[6*i];
898     alpha2 = x[6*i+1];
899     alpha3 = x[6*i+2];
900     alpha4 = x[6*i+3];
901     alpha5 = x[6*i+4];
902     alpha6 = x[6*i+5];
903     while (n-->0) {
904       y[6*(*idx)]   += alpha1*(*v);
905       y[6*(*idx)+1] += alpha2*(*v);
906       y[6*(*idx)+2] += alpha3*(*v);
907       y[6*(*idx)+3] += alpha4*(*v);
908       y[6*(*idx)+4] += alpha5*(*v);
909       y[6*(*idx)+5] += alpha6*(*v);
910       idx++; v++;
911     }
912   }
913   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
914   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
915   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
916   PetscFunctionReturn(0);
917 }
918 
919 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
920 {
921   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
922   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
923   const PetscScalar *x,*v;
924   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
925   PetscErrorCode    ierr;
926   PetscInt          n,i,jrow,j;
927   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
928 
929   PetscFunctionBegin;
930   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
931   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
932   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
933   idx  = a->j;
934   v    = a->a;
935   ii   = a->i;
936 
937   for (i=0; i<m; i++) {
938     jrow = ii[i];
939     n    = ii[i+1] - jrow;
940     sum1 = 0.0;
941     sum2 = 0.0;
942     sum3 = 0.0;
943     sum4 = 0.0;
944     sum5 = 0.0;
945     sum6 = 0.0;
946     for (j=0; j<n; j++) {
947       sum1 += v[jrow]*x[6*idx[jrow]];
948       sum2 += v[jrow]*x[6*idx[jrow]+1];
949       sum3 += v[jrow]*x[6*idx[jrow]+2];
950       sum4 += v[jrow]*x[6*idx[jrow]+3];
951       sum5 += v[jrow]*x[6*idx[jrow]+4];
952       sum6 += v[jrow]*x[6*idx[jrow]+5];
953       jrow++;
954     }
955     y[6*i]   += sum1;
956     y[6*i+1] += sum2;
957     y[6*i+2] += sum3;
958     y[6*i+3] += sum4;
959     y[6*i+4] += sum5;
960     y[6*i+5] += sum6;
961   }
962 
963   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
964   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
965   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
970 {
971   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
972   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
973   const PetscScalar *x,*v;
974   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
975   PetscErrorCode    ierr;
976   PetscInt          n,i;
977   const PetscInt    m = b->AIJ->rmap->n,*idx;
978 
979   PetscFunctionBegin;
980   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
981   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
982   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
983 
984   for (i=0; i<m; i++) {
985     idx    = a->j + a->i[i];
986     v      = a->a + a->i[i];
987     n      = a->i[i+1] - a->i[i];
988     alpha1 = x[6*i];
989     alpha2 = x[6*i+1];
990     alpha3 = x[6*i+2];
991     alpha4 = x[6*i+3];
992     alpha5 = x[6*i+4];
993     alpha6 = x[6*i+5];
994     while (n-->0) {
995       y[6*(*idx)]   += alpha1*(*v);
996       y[6*(*idx)+1] += alpha2*(*v);
997       y[6*(*idx)+2] += alpha3*(*v);
998       y[6*(*idx)+3] += alpha4*(*v);
999       y[6*(*idx)+4] += alpha5*(*v);
1000       y[6*(*idx)+5] += alpha6*(*v);
1001       idx++; v++;
1002     }
1003   }
1004   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1005   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1006   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 /* ------------------------------------------------------------------------------*/
1011 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1012 {
1013   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1014   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1015   const PetscScalar *x,*v;
1016   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1017   PetscErrorCode    ierr;
1018   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1019   PetscInt          nonzerorow=0,n,i,jrow,j;
1020 
1021   PetscFunctionBegin;
1022   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1023   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1024   idx  = a->j;
1025   v    = a->a;
1026   ii   = a->i;
1027 
1028   for (i=0; i<m; i++) {
1029     jrow  = ii[i];
1030     n     = ii[i+1] - jrow;
1031     sum1  = 0.0;
1032     sum2  = 0.0;
1033     sum3  = 0.0;
1034     sum4  = 0.0;
1035     sum5  = 0.0;
1036     sum6  = 0.0;
1037     sum7  = 0.0;
1038 
1039     nonzerorow += (n>0);
1040     for (j=0; j<n; j++) {
1041       sum1 += v[jrow]*x[7*idx[jrow]];
1042       sum2 += v[jrow]*x[7*idx[jrow]+1];
1043       sum3 += v[jrow]*x[7*idx[jrow]+2];
1044       sum4 += v[jrow]*x[7*idx[jrow]+3];
1045       sum5 += v[jrow]*x[7*idx[jrow]+4];
1046       sum6 += v[jrow]*x[7*idx[jrow]+5];
1047       sum7 += v[jrow]*x[7*idx[jrow]+6];
1048       jrow++;
1049     }
1050     y[7*i]   = sum1;
1051     y[7*i+1] = sum2;
1052     y[7*i+2] = sum3;
1053     y[7*i+3] = sum4;
1054     y[7*i+4] = sum5;
1055     y[7*i+5] = sum6;
1056     y[7*i+6] = sum7;
1057   }
1058 
1059   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
1060   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1061   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1066 {
1067   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1068   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1069   const PetscScalar *x,*v;
1070   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1071   PetscErrorCode    ierr;
1072   const PetscInt    m = b->AIJ->rmap->n,*idx;
1073   PetscInt          n,i;
1074 
1075   PetscFunctionBegin;
1076   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1077   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1078   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1079 
1080   for (i=0; i<m; i++) {
1081     idx    = a->j + a->i[i];
1082     v      = a->a + a->i[i];
1083     n      = a->i[i+1] - a->i[i];
1084     alpha1 = x[7*i];
1085     alpha2 = x[7*i+1];
1086     alpha3 = x[7*i+2];
1087     alpha4 = x[7*i+3];
1088     alpha5 = x[7*i+4];
1089     alpha6 = x[7*i+5];
1090     alpha7 = x[7*i+6];
1091     while (n-->0) {
1092       y[7*(*idx)]   += alpha1*(*v);
1093       y[7*(*idx)+1] += alpha2*(*v);
1094       y[7*(*idx)+2] += alpha3*(*v);
1095       y[7*(*idx)+3] += alpha4*(*v);
1096       y[7*(*idx)+4] += alpha5*(*v);
1097       y[7*(*idx)+5] += alpha6*(*v);
1098       y[7*(*idx)+6] += alpha7*(*v);
1099       idx++; v++;
1100     }
1101   }
1102   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1103   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1104   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1109 {
1110   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1111   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1112   const PetscScalar *x,*v;
1113   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1114   PetscErrorCode    ierr;
1115   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1116   PetscInt          n,i,jrow,j;
1117 
1118   PetscFunctionBegin;
1119   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1120   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1121   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1122   idx  = a->j;
1123   v    = a->a;
1124   ii   = a->i;
1125 
1126   for (i=0; i<m; i++) {
1127     jrow = ii[i];
1128     n    = ii[i+1] - jrow;
1129     sum1 = 0.0;
1130     sum2 = 0.0;
1131     sum3 = 0.0;
1132     sum4 = 0.0;
1133     sum5 = 0.0;
1134     sum6 = 0.0;
1135     sum7 = 0.0;
1136     for (j=0; j<n; j++) {
1137       sum1 += v[jrow]*x[7*idx[jrow]];
1138       sum2 += v[jrow]*x[7*idx[jrow]+1];
1139       sum3 += v[jrow]*x[7*idx[jrow]+2];
1140       sum4 += v[jrow]*x[7*idx[jrow]+3];
1141       sum5 += v[jrow]*x[7*idx[jrow]+4];
1142       sum6 += v[jrow]*x[7*idx[jrow]+5];
1143       sum7 += v[jrow]*x[7*idx[jrow]+6];
1144       jrow++;
1145     }
1146     y[7*i]   += sum1;
1147     y[7*i+1] += sum2;
1148     y[7*i+2] += sum3;
1149     y[7*i+3] += sum4;
1150     y[7*i+4] += sum5;
1151     y[7*i+5] += sum6;
1152     y[7*i+6] += sum7;
1153   }
1154 
1155   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1156   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1157   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1162 {
1163   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1164   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1165   const PetscScalar *x,*v;
1166   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1167   PetscErrorCode    ierr;
1168   const PetscInt    m = b->AIJ->rmap->n,*idx;
1169   PetscInt          n,i;
1170 
1171   PetscFunctionBegin;
1172   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1173   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1174   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1175   for (i=0; i<m; i++) {
1176     idx    = a->j + a->i[i];
1177     v      = a->a + a->i[i];
1178     n      = a->i[i+1] - a->i[i];
1179     alpha1 = x[7*i];
1180     alpha2 = x[7*i+1];
1181     alpha3 = x[7*i+2];
1182     alpha4 = x[7*i+3];
1183     alpha5 = x[7*i+4];
1184     alpha6 = x[7*i+5];
1185     alpha7 = x[7*i+6];
1186     while (n-->0) {
1187       y[7*(*idx)]   += alpha1*(*v);
1188       y[7*(*idx)+1] += alpha2*(*v);
1189       y[7*(*idx)+2] += alpha3*(*v);
1190       y[7*(*idx)+3] += alpha4*(*v);
1191       y[7*(*idx)+4] += alpha5*(*v);
1192       y[7*(*idx)+5] += alpha6*(*v);
1193       y[7*(*idx)+6] += alpha7*(*v);
1194       idx++; v++;
1195     }
1196   }
1197   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1198   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1199   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1204 {
1205   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1206   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1207   const PetscScalar *x,*v;
1208   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1209   PetscErrorCode    ierr;
1210   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1211   PetscInt          nonzerorow=0,n,i,jrow,j;
1212 
1213   PetscFunctionBegin;
1214   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1215   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1216   idx  = a->j;
1217   v    = a->a;
1218   ii   = a->i;
1219 
1220   for (i=0; i<m; i++) {
1221     jrow  = ii[i];
1222     n     = ii[i+1] - jrow;
1223     sum1  = 0.0;
1224     sum2  = 0.0;
1225     sum3  = 0.0;
1226     sum4  = 0.0;
1227     sum5  = 0.0;
1228     sum6  = 0.0;
1229     sum7  = 0.0;
1230     sum8  = 0.0;
1231 
1232     nonzerorow += (n>0);
1233     for (j=0; j<n; j++) {
1234       sum1 += v[jrow]*x[8*idx[jrow]];
1235       sum2 += v[jrow]*x[8*idx[jrow]+1];
1236       sum3 += v[jrow]*x[8*idx[jrow]+2];
1237       sum4 += v[jrow]*x[8*idx[jrow]+3];
1238       sum5 += v[jrow]*x[8*idx[jrow]+4];
1239       sum6 += v[jrow]*x[8*idx[jrow]+5];
1240       sum7 += v[jrow]*x[8*idx[jrow]+6];
1241       sum8 += v[jrow]*x[8*idx[jrow]+7];
1242       jrow++;
1243     }
1244     y[8*i]   = sum1;
1245     y[8*i+1] = sum2;
1246     y[8*i+2] = sum3;
1247     y[8*i+3] = sum4;
1248     y[8*i+4] = sum5;
1249     y[8*i+5] = sum6;
1250     y[8*i+6] = sum7;
1251     y[8*i+7] = sum8;
1252   }
1253 
1254   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
1255   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1256   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1261 {
1262   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1263   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1264   const PetscScalar *x,*v;
1265   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1266   PetscErrorCode    ierr;
1267   const PetscInt    m = b->AIJ->rmap->n,*idx;
1268   PetscInt          n,i;
1269 
1270   PetscFunctionBegin;
1271   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1272   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1273   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1274 
1275   for (i=0; i<m; i++) {
1276     idx    = a->j + a->i[i];
1277     v      = a->a + a->i[i];
1278     n      = a->i[i+1] - a->i[i];
1279     alpha1 = x[8*i];
1280     alpha2 = x[8*i+1];
1281     alpha3 = x[8*i+2];
1282     alpha4 = x[8*i+3];
1283     alpha5 = x[8*i+4];
1284     alpha6 = x[8*i+5];
1285     alpha7 = x[8*i+6];
1286     alpha8 = x[8*i+7];
1287     while (n-->0) {
1288       y[8*(*idx)]   += alpha1*(*v);
1289       y[8*(*idx)+1] += alpha2*(*v);
1290       y[8*(*idx)+2] += alpha3*(*v);
1291       y[8*(*idx)+3] += alpha4*(*v);
1292       y[8*(*idx)+4] += alpha5*(*v);
1293       y[8*(*idx)+5] += alpha6*(*v);
1294       y[8*(*idx)+6] += alpha7*(*v);
1295       y[8*(*idx)+7] += alpha8*(*v);
1296       idx++; v++;
1297     }
1298   }
1299   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1300   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1301   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1306 {
1307   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1308   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1309   const PetscScalar *x,*v;
1310   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1311   PetscErrorCode    ierr;
1312   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1313   PetscInt          n,i,jrow,j;
1314 
1315   PetscFunctionBegin;
1316   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1317   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1318   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1319   idx  = a->j;
1320   v    = a->a;
1321   ii   = a->i;
1322 
1323   for (i=0; i<m; i++) {
1324     jrow = ii[i];
1325     n    = ii[i+1] - jrow;
1326     sum1 = 0.0;
1327     sum2 = 0.0;
1328     sum3 = 0.0;
1329     sum4 = 0.0;
1330     sum5 = 0.0;
1331     sum6 = 0.0;
1332     sum7 = 0.0;
1333     sum8 = 0.0;
1334     for (j=0; j<n; j++) {
1335       sum1 += v[jrow]*x[8*idx[jrow]];
1336       sum2 += v[jrow]*x[8*idx[jrow]+1];
1337       sum3 += v[jrow]*x[8*idx[jrow]+2];
1338       sum4 += v[jrow]*x[8*idx[jrow]+3];
1339       sum5 += v[jrow]*x[8*idx[jrow]+4];
1340       sum6 += v[jrow]*x[8*idx[jrow]+5];
1341       sum7 += v[jrow]*x[8*idx[jrow]+6];
1342       sum8 += v[jrow]*x[8*idx[jrow]+7];
1343       jrow++;
1344     }
1345     y[8*i]   += sum1;
1346     y[8*i+1] += sum2;
1347     y[8*i+2] += sum3;
1348     y[8*i+3] += sum4;
1349     y[8*i+4] += sum5;
1350     y[8*i+5] += sum6;
1351     y[8*i+6] += sum7;
1352     y[8*i+7] += sum8;
1353   }
1354 
1355   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1356   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1357   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1362 {
1363   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1364   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1365   const PetscScalar *x,*v;
1366   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1367   PetscErrorCode    ierr;
1368   const PetscInt    m = b->AIJ->rmap->n,*idx;
1369   PetscInt          n,i;
1370 
1371   PetscFunctionBegin;
1372   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1373   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1374   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1375   for (i=0; i<m; i++) {
1376     idx    = a->j + a->i[i];
1377     v      = a->a + a->i[i];
1378     n      = a->i[i+1] - a->i[i];
1379     alpha1 = x[8*i];
1380     alpha2 = x[8*i+1];
1381     alpha3 = x[8*i+2];
1382     alpha4 = x[8*i+3];
1383     alpha5 = x[8*i+4];
1384     alpha6 = x[8*i+5];
1385     alpha7 = x[8*i+6];
1386     alpha8 = x[8*i+7];
1387     while (n-->0) {
1388       y[8*(*idx)]   += alpha1*(*v);
1389       y[8*(*idx)+1] += alpha2*(*v);
1390       y[8*(*idx)+2] += alpha3*(*v);
1391       y[8*(*idx)+3] += alpha4*(*v);
1392       y[8*(*idx)+4] += alpha5*(*v);
1393       y[8*(*idx)+5] += alpha6*(*v);
1394       y[8*(*idx)+6] += alpha7*(*v);
1395       y[8*(*idx)+7] += alpha8*(*v);
1396       idx++; v++;
1397     }
1398   }
1399   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1400   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1401   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 /* ------------------------------------------------------------------------------*/
1406 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1407 {
1408   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1409   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1410   const PetscScalar *x,*v;
1411   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1412   PetscErrorCode    ierr;
1413   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1414   PetscInt          nonzerorow=0,n,i,jrow,j;
1415 
1416   PetscFunctionBegin;
1417   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1418   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1419   idx  = a->j;
1420   v    = a->a;
1421   ii   = a->i;
1422 
1423   for (i=0; i<m; i++) {
1424     jrow  = ii[i];
1425     n     = ii[i+1] - jrow;
1426     sum1  = 0.0;
1427     sum2  = 0.0;
1428     sum3  = 0.0;
1429     sum4  = 0.0;
1430     sum5  = 0.0;
1431     sum6  = 0.0;
1432     sum7  = 0.0;
1433     sum8  = 0.0;
1434     sum9  = 0.0;
1435 
1436     nonzerorow += (n>0);
1437     for (j=0; j<n; j++) {
1438       sum1 += v[jrow]*x[9*idx[jrow]];
1439       sum2 += v[jrow]*x[9*idx[jrow]+1];
1440       sum3 += v[jrow]*x[9*idx[jrow]+2];
1441       sum4 += v[jrow]*x[9*idx[jrow]+3];
1442       sum5 += v[jrow]*x[9*idx[jrow]+4];
1443       sum6 += v[jrow]*x[9*idx[jrow]+5];
1444       sum7 += v[jrow]*x[9*idx[jrow]+6];
1445       sum8 += v[jrow]*x[9*idx[jrow]+7];
1446       sum9 += v[jrow]*x[9*idx[jrow]+8];
1447       jrow++;
1448     }
1449     y[9*i]   = sum1;
1450     y[9*i+1] = sum2;
1451     y[9*i+2] = sum3;
1452     y[9*i+3] = sum4;
1453     y[9*i+4] = sum5;
1454     y[9*i+5] = sum6;
1455     y[9*i+6] = sum7;
1456     y[9*i+7] = sum8;
1457     y[9*i+8] = sum9;
1458   }
1459 
1460   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
1461   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1462   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 /* ------------------------------------------------------------------------------*/
1467 
1468 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1469 {
1470   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1471   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1472   const PetscScalar *x,*v;
1473   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1474   PetscErrorCode    ierr;
1475   const PetscInt    m = b->AIJ->rmap->n,*idx;
1476   PetscInt          n,i;
1477 
1478   PetscFunctionBegin;
1479   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1480   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1481   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1482 
1483   for (i=0; i<m; i++) {
1484     idx    = a->j + a->i[i];
1485     v      = a->a + a->i[i];
1486     n      = a->i[i+1] - a->i[i];
1487     alpha1 = x[9*i];
1488     alpha2 = x[9*i+1];
1489     alpha3 = x[9*i+2];
1490     alpha4 = x[9*i+3];
1491     alpha5 = x[9*i+4];
1492     alpha6 = x[9*i+5];
1493     alpha7 = x[9*i+6];
1494     alpha8 = x[9*i+7];
1495     alpha9 = x[9*i+8];
1496     while (n-->0) {
1497       y[9*(*idx)]   += alpha1*(*v);
1498       y[9*(*idx)+1] += alpha2*(*v);
1499       y[9*(*idx)+2] += alpha3*(*v);
1500       y[9*(*idx)+3] += alpha4*(*v);
1501       y[9*(*idx)+4] += alpha5*(*v);
1502       y[9*(*idx)+5] += alpha6*(*v);
1503       y[9*(*idx)+6] += alpha7*(*v);
1504       y[9*(*idx)+7] += alpha8*(*v);
1505       y[9*(*idx)+8] += alpha9*(*v);
1506       idx++; v++;
1507     }
1508   }
1509   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1510   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1511   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1516 {
1517   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1518   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1519   const PetscScalar *x,*v;
1520   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1521   PetscErrorCode    ierr;
1522   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1523   PetscInt          n,i,jrow,j;
1524 
1525   PetscFunctionBegin;
1526   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1527   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1528   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1529   idx  = a->j;
1530   v    = a->a;
1531   ii   = a->i;
1532 
1533   for (i=0; i<m; i++) {
1534     jrow = ii[i];
1535     n    = ii[i+1] - jrow;
1536     sum1 = 0.0;
1537     sum2 = 0.0;
1538     sum3 = 0.0;
1539     sum4 = 0.0;
1540     sum5 = 0.0;
1541     sum6 = 0.0;
1542     sum7 = 0.0;
1543     sum8 = 0.0;
1544     sum9 = 0.0;
1545     for (j=0; j<n; j++) {
1546       sum1 += v[jrow]*x[9*idx[jrow]];
1547       sum2 += v[jrow]*x[9*idx[jrow]+1];
1548       sum3 += v[jrow]*x[9*idx[jrow]+2];
1549       sum4 += v[jrow]*x[9*idx[jrow]+3];
1550       sum5 += v[jrow]*x[9*idx[jrow]+4];
1551       sum6 += v[jrow]*x[9*idx[jrow]+5];
1552       sum7 += v[jrow]*x[9*idx[jrow]+6];
1553       sum8 += v[jrow]*x[9*idx[jrow]+7];
1554       sum9 += v[jrow]*x[9*idx[jrow]+8];
1555       jrow++;
1556     }
1557     y[9*i]   += sum1;
1558     y[9*i+1] += sum2;
1559     y[9*i+2] += sum3;
1560     y[9*i+3] += sum4;
1561     y[9*i+4] += sum5;
1562     y[9*i+5] += sum6;
1563     y[9*i+6] += sum7;
1564     y[9*i+7] += sum8;
1565     y[9*i+8] += sum9;
1566   }
1567 
1568   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1569   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1570   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1575 {
1576   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1577   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1578   const PetscScalar *x,*v;
1579   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1580   PetscErrorCode    ierr;
1581   const PetscInt    m = b->AIJ->rmap->n,*idx;
1582   PetscInt          n,i;
1583 
1584   PetscFunctionBegin;
1585   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1586   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1587   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1588   for (i=0; i<m; i++) {
1589     idx    = a->j + a->i[i];
1590     v      = a->a + a->i[i];
1591     n      = a->i[i+1] - a->i[i];
1592     alpha1 = x[9*i];
1593     alpha2 = x[9*i+1];
1594     alpha3 = x[9*i+2];
1595     alpha4 = x[9*i+3];
1596     alpha5 = x[9*i+4];
1597     alpha6 = x[9*i+5];
1598     alpha7 = x[9*i+6];
1599     alpha8 = x[9*i+7];
1600     alpha9 = x[9*i+8];
1601     while (n-->0) {
1602       y[9*(*idx)]   += alpha1*(*v);
1603       y[9*(*idx)+1] += alpha2*(*v);
1604       y[9*(*idx)+2] += alpha3*(*v);
1605       y[9*(*idx)+3] += alpha4*(*v);
1606       y[9*(*idx)+4] += alpha5*(*v);
1607       y[9*(*idx)+5] += alpha6*(*v);
1608       y[9*(*idx)+6] += alpha7*(*v);
1609       y[9*(*idx)+7] += alpha8*(*v);
1610       y[9*(*idx)+8] += alpha9*(*v);
1611       idx++; v++;
1612     }
1613   }
1614   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1615   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1616   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1617   PetscFunctionReturn(0);
1618 }
1619 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1620 {
1621   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1622   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1623   const PetscScalar *x,*v;
1624   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1625   PetscErrorCode    ierr;
1626   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1627   PetscInt          nonzerorow=0,n,i,jrow,j;
1628 
1629   PetscFunctionBegin;
1630   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1631   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1632   idx  = a->j;
1633   v    = a->a;
1634   ii   = a->i;
1635 
1636   for (i=0; i<m; i++) {
1637     jrow  = ii[i];
1638     n     = ii[i+1] - jrow;
1639     sum1  = 0.0;
1640     sum2  = 0.0;
1641     sum3  = 0.0;
1642     sum4  = 0.0;
1643     sum5  = 0.0;
1644     sum6  = 0.0;
1645     sum7  = 0.0;
1646     sum8  = 0.0;
1647     sum9  = 0.0;
1648     sum10 = 0.0;
1649 
1650     nonzerorow += (n>0);
1651     for (j=0; j<n; j++) {
1652       sum1  += v[jrow]*x[10*idx[jrow]];
1653       sum2  += v[jrow]*x[10*idx[jrow]+1];
1654       sum3  += v[jrow]*x[10*idx[jrow]+2];
1655       sum4  += v[jrow]*x[10*idx[jrow]+3];
1656       sum5  += v[jrow]*x[10*idx[jrow]+4];
1657       sum6  += v[jrow]*x[10*idx[jrow]+5];
1658       sum7  += v[jrow]*x[10*idx[jrow]+6];
1659       sum8  += v[jrow]*x[10*idx[jrow]+7];
1660       sum9  += v[jrow]*x[10*idx[jrow]+8];
1661       sum10 += v[jrow]*x[10*idx[jrow]+9];
1662       jrow++;
1663     }
1664     y[10*i]   = sum1;
1665     y[10*i+1] = sum2;
1666     y[10*i+2] = sum3;
1667     y[10*i+3] = sum4;
1668     y[10*i+4] = sum5;
1669     y[10*i+5] = sum6;
1670     y[10*i+6] = sum7;
1671     y[10*i+7] = sum8;
1672     y[10*i+8] = sum9;
1673     y[10*i+9] = sum10;
1674   }
1675 
1676   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
1677   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1678   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1683 {
1684   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1685   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1686   const PetscScalar *x,*v;
1687   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1688   PetscErrorCode    ierr;
1689   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1690   PetscInt          n,i,jrow,j;
1691 
1692   PetscFunctionBegin;
1693   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1694   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1695   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1696   idx  = a->j;
1697   v    = a->a;
1698   ii   = a->i;
1699 
1700   for (i=0; i<m; i++) {
1701     jrow  = ii[i];
1702     n     = ii[i+1] - jrow;
1703     sum1  = 0.0;
1704     sum2  = 0.0;
1705     sum3  = 0.0;
1706     sum4  = 0.0;
1707     sum5  = 0.0;
1708     sum6  = 0.0;
1709     sum7  = 0.0;
1710     sum8  = 0.0;
1711     sum9  = 0.0;
1712     sum10 = 0.0;
1713     for (j=0; j<n; j++) {
1714       sum1  += v[jrow]*x[10*idx[jrow]];
1715       sum2  += v[jrow]*x[10*idx[jrow]+1];
1716       sum3  += v[jrow]*x[10*idx[jrow]+2];
1717       sum4  += v[jrow]*x[10*idx[jrow]+3];
1718       sum5  += v[jrow]*x[10*idx[jrow]+4];
1719       sum6  += v[jrow]*x[10*idx[jrow]+5];
1720       sum7  += v[jrow]*x[10*idx[jrow]+6];
1721       sum8  += v[jrow]*x[10*idx[jrow]+7];
1722       sum9  += v[jrow]*x[10*idx[jrow]+8];
1723       sum10 += v[jrow]*x[10*idx[jrow]+9];
1724       jrow++;
1725     }
1726     y[10*i]   += sum1;
1727     y[10*i+1] += sum2;
1728     y[10*i+2] += sum3;
1729     y[10*i+3] += sum4;
1730     y[10*i+4] += sum5;
1731     y[10*i+5] += sum6;
1732     y[10*i+6] += sum7;
1733     y[10*i+7] += sum8;
1734     y[10*i+8] += sum9;
1735     y[10*i+9] += sum10;
1736   }
1737 
1738   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1739   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1740   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1745 {
1746   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1747   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1748   const PetscScalar *x,*v;
1749   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1750   PetscErrorCode    ierr;
1751   const PetscInt    m = b->AIJ->rmap->n,*idx;
1752   PetscInt          n,i;
1753 
1754   PetscFunctionBegin;
1755   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1756   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1757   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1758 
1759   for (i=0; i<m; i++) {
1760     idx     = a->j + a->i[i];
1761     v       = a->a + a->i[i];
1762     n       = a->i[i+1] - a->i[i];
1763     alpha1  = x[10*i];
1764     alpha2  = x[10*i+1];
1765     alpha3  = x[10*i+2];
1766     alpha4  = x[10*i+3];
1767     alpha5  = x[10*i+4];
1768     alpha6  = x[10*i+5];
1769     alpha7  = x[10*i+6];
1770     alpha8  = x[10*i+7];
1771     alpha9  = x[10*i+8];
1772     alpha10 = x[10*i+9];
1773     while (n-->0) {
1774       y[10*(*idx)]   += alpha1*(*v);
1775       y[10*(*idx)+1] += alpha2*(*v);
1776       y[10*(*idx)+2] += alpha3*(*v);
1777       y[10*(*idx)+3] += alpha4*(*v);
1778       y[10*(*idx)+4] += alpha5*(*v);
1779       y[10*(*idx)+5] += alpha6*(*v);
1780       y[10*(*idx)+6] += alpha7*(*v);
1781       y[10*(*idx)+7] += alpha8*(*v);
1782       y[10*(*idx)+8] += alpha9*(*v);
1783       y[10*(*idx)+9] += alpha10*(*v);
1784       idx++; v++;
1785     }
1786   }
1787   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1788   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1789   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1794 {
1795   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1796   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1797   const PetscScalar *x,*v;
1798   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1799   PetscErrorCode    ierr;
1800   const PetscInt    m = b->AIJ->rmap->n,*idx;
1801   PetscInt          n,i;
1802 
1803   PetscFunctionBegin;
1804   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1805   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1806   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1807   for (i=0; i<m; i++) {
1808     idx     = a->j + a->i[i];
1809     v       = a->a + a->i[i];
1810     n       = a->i[i+1] - a->i[i];
1811     alpha1  = x[10*i];
1812     alpha2  = x[10*i+1];
1813     alpha3  = x[10*i+2];
1814     alpha4  = x[10*i+3];
1815     alpha5  = x[10*i+4];
1816     alpha6  = x[10*i+5];
1817     alpha7  = x[10*i+6];
1818     alpha8  = x[10*i+7];
1819     alpha9  = x[10*i+8];
1820     alpha10 = x[10*i+9];
1821     while (n-->0) {
1822       y[10*(*idx)]   += alpha1*(*v);
1823       y[10*(*idx)+1] += alpha2*(*v);
1824       y[10*(*idx)+2] += alpha3*(*v);
1825       y[10*(*idx)+3] += alpha4*(*v);
1826       y[10*(*idx)+4] += alpha5*(*v);
1827       y[10*(*idx)+5] += alpha6*(*v);
1828       y[10*(*idx)+6] += alpha7*(*v);
1829       y[10*(*idx)+7] += alpha8*(*v);
1830       y[10*(*idx)+8] += alpha9*(*v);
1831       y[10*(*idx)+9] += alpha10*(*v);
1832       idx++; v++;
1833     }
1834   }
1835   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1836   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1837   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 
1842 /*--------------------------------------------------------------------------------------------*/
1843 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1844 {
1845   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1846   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1847   const PetscScalar *x,*v;
1848   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1849   PetscErrorCode    ierr;
1850   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1851   PetscInt          nonzerorow=0,n,i,jrow,j;
1852 
1853   PetscFunctionBegin;
1854   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1855   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1856   idx  = a->j;
1857   v    = a->a;
1858   ii   = a->i;
1859 
1860   for (i=0; i<m; i++) {
1861     jrow  = ii[i];
1862     n     = ii[i+1] - jrow;
1863     sum1  = 0.0;
1864     sum2  = 0.0;
1865     sum3  = 0.0;
1866     sum4  = 0.0;
1867     sum5  = 0.0;
1868     sum6  = 0.0;
1869     sum7  = 0.0;
1870     sum8  = 0.0;
1871     sum9  = 0.0;
1872     sum10 = 0.0;
1873     sum11 = 0.0;
1874 
1875     nonzerorow += (n>0);
1876     for (j=0; j<n; j++) {
1877       sum1  += v[jrow]*x[11*idx[jrow]];
1878       sum2  += v[jrow]*x[11*idx[jrow]+1];
1879       sum3  += v[jrow]*x[11*idx[jrow]+2];
1880       sum4  += v[jrow]*x[11*idx[jrow]+3];
1881       sum5  += v[jrow]*x[11*idx[jrow]+4];
1882       sum6  += v[jrow]*x[11*idx[jrow]+5];
1883       sum7  += v[jrow]*x[11*idx[jrow]+6];
1884       sum8  += v[jrow]*x[11*idx[jrow]+7];
1885       sum9  += v[jrow]*x[11*idx[jrow]+8];
1886       sum10 += v[jrow]*x[11*idx[jrow]+9];
1887       sum11 += v[jrow]*x[11*idx[jrow]+10];
1888       jrow++;
1889     }
1890     y[11*i]    = sum1;
1891     y[11*i+1]  = sum2;
1892     y[11*i+2]  = sum3;
1893     y[11*i+3]  = sum4;
1894     y[11*i+4]  = sum5;
1895     y[11*i+5]  = sum6;
1896     y[11*i+6]  = sum7;
1897     y[11*i+7]  = sum8;
1898     y[11*i+8]  = sum9;
1899     y[11*i+9]  = sum10;
1900     y[11*i+10] = sum11;
1901   }
1902 
1903   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
1904   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1905   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1910 {
1911   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1912   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1913   const PetscScalar *x,*v;
1914   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1915   PetscErrorCode    ierr;
1916   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1917   PetscInt          n,i,jrow,j;
1918 
1919   PetscFunctionBegin;
1920   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1921   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1922   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1923   idx  = a->j;
1924   v    = a->a;
1925   ii   = a->i;
1926 
1927   for (i=0; i<m; i++) {
1928     jrow  = ii[i];
1929     n     = ii[i+1] - jrow;
1930     sum1  = 0.0;
1931     sum2  = 0.0;
1932     sum3  = 0.0;
1933     sum4  = 0.0;
1934     sum5  = 0.0;
1935     sum6  = 0.0;
1936     sum7  = 0.0;
1937     sum8  = 0.0;
1938     sum9  = 0.0;
1939     sum10 = 0.0;
1940     sum11 = 0.0;
1941     for (j=0; j<n; j++) {
1942       sum1  += v[jrow]*x[11*idx[jrow]];
1943       sum2  += v[jrow]*x[11*idx[jrow]+1];
1944       sum3  += v[jrow]*x[11*idx[jrow]+2];
1945       sum4  += v[jrow]*x[11*idx[jrow]+3];
1946       sum5  += v[jrow]*x[11*idx[jrow]+4];
1947       sum6  += v[jrow]*x[11*idx[jrow]+5];
1948       sum7  += v[jrow]*x[11*idx[jrow]+6];
1949       sum8  += v[jrow]*x[11*idx[jrow]+7];
1950       sum9  += v[jrow]*x[11*idx[jrow]+8];
1951       sum10 += v[jrow]*x[11*idx[jrow]+9];
1952       sum11 += v[jrow]*x[11*idx[jrow]+10];
1953       jrow++;
1954     }
1955     y[11*i]    += sum1;
1956     y[11*i+1]  += sum2;
1957     y[11*i+2]  += sum3;
1958     y[11*i+3]  += sum4;
1959     y[11*i+4]  += sum5;
1960     y[11*i+5]  += sum6;
1961     y[11*i+6]  += sum7;
1962     y[11*i+7]  += sum8;
1963     y[11*i+8]  += sum9;
1964     y[11*i+9]  += sum10;
1965     y[11*i+10] += sum11;
1966   }
1967 
1968   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
1969   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1970   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1971   PetscFunctionReturn(0);
1972 }
1973 
1974 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1975 {
1976   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1977   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1978   const PetscScalar *x,*v;
1979   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1980   PetscErrorCode    ierr;
1981   const PetscInt    m = b->AIJ->rmap->n,*idx;
1982   PetscInt          n,i;
1983 
1984   PetscFunctionBegin;
1985   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1986   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1987   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1988 
1989   for (i=0; i<m; i++) {
1990     idx     = a->j + a->i[i];
1991     v       = a->a + a->i[i];
1992     n       = a->i[i+1] - a->i[i];
1993     alpha1  = x[11*i];
1994     alpha2  = x[11*i+1];
1995     alpha3  = x[11*i+2];
1996     alpha4  = x[11*i+3];
1997     alpha5  = x[11*i+4];
1998     alpha6  = x[11*i+5];
1999     alpha7  = x[11*i+6];
2000     alpha8  = x[11*i+7];
2001     alpha9  = x[11*i+8];
2002     alpha10 = x[11*i+9];
2003     alpha11 = x[11*i+10];
2004     while (n-->0) {
2005       y[11*(*idx)]    += alpha1*(*v);
2006       y[11*(*idx)+1]  += alpha2*(*v);
2007       y[11*(*idx)+2]  += alpha3*(*v);
2008       y[11*(*idx)+3]  += alpha4*(*v);
2009       y[11*(*idx)+4]  += alpha5*(*v);
2010       y[11*(*idx)+5]  += alpha6*(*v);
2011       y[11*(*idx)+6]  += alpha7*(*v);
2012       y[11*(*idx)+7]  += alpha8*(*v);
2013       y[11*(*idx)+8]  += alpha9*(*v);
2014       y[11*(*idx)+9]  += alpha10*(*v);
2015       y[11*(*idx)+10] += alpha11*(*v);
2016       idx++; v++;
2017     }
2018   }
2019   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2020   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2021   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2026 {
2027   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2028   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2029   const PetscScalar *x,*v;
2030   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2031   PetscErrorCode    ierr;
2032   const PetscInt    m = b->AIJ->rmap->n,*idx;
2033   PetscInt          n,i;
2034 
2035   PetscFunctionBegin;
2036   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2037   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2038   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2039   for (i=0; i<m; i++) {
2040     idx     = a->j + a->i[i];
2041     v       = a->a + a->i[i];
2042     n       = a->i[i+1] - a->i[i];
2043     alpha1  = x[11*i];
2044     alpha2  = x[11*i+1];
2045     alpha3  = x[11*i+2];
2046     alpha4  = x[11*i+3];
2047     alpha5  = x[11*i+4];
2048     alpha6  = x[11*i+5];
2049     alpha7  = x[11*i+6];
2050     alpha8  = x[11*i+7];
2051     alpha9  = x[11*i+8];
2052     alpha10 = x[11*i+9];
2053     alpha11 = x[11*i+10];
2054     while (n-->0) {
2055       y[11*(*idx)]    += alpha1*(*v);
2056       y[11*(*idx)+1]  += alpha2*(*v);
2057       y[11*(*idx)+2]  += alpha3*(*v);
2058       y[11*(*idx)+3]  += alpha4*(*v);
2059       y[11*(*idx)+4]  += alpha5*(*v);
2060       y[11*(*idx)+5]  += alpha6*(*v);
2061       y[11*(*idx)+6]  += alpha7*(*v);
2062       y[11*(*idx)+7]  += alpha8*(*v);
2063       y[11*(*idx)+8]  += alpha9*(*v);
2064       y[11*(*idx)+9]  += alpha10*(*v);
2065       y[11*(*idx)+10] += alpha11*(*v);
2066       idx++; v++;
2067     }
2068   }
2069   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2070   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2071   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 
2076 /*--------------------------------------------------------------------------------------------*/
2077 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2078 {
2079   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2080   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2081   const PetscScalar *x,*v;
2082   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2083   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2084   PetscErrorCode    ierr;
2085   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2086   PetscInt          nonzerorow=0,n,i,jrow,j;
2087 
2088   PetscFunctionBegin;
2089   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2090   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2091   idx  = a->j;
2092   v    = a->a;
2093   ii   = a->i;
2094 
2095   for (i=0; i<m; i++) {
2096     jrow  = ii[i];
2097     n     = ii[i+1] - jrow;
2098     sum1  = 0.0;
2099     sum2  = 0.0;
2100     sum3  = 0.0;
2101     sum4  = 0.0;
2102     sum5  = 0.0;
2103     sum6  = 0.0;
2104     sum7  = 0.0;
2105     sum8  = 0.0;
2106     sum9  = 0.0;
2107     sum10 = 0.0;
2108     sum11 = 0.0;
2109     sum12 = 0.0;
2110     sum13 = 0.0;
2111     sum14 = 0.0;
2112     sum15 = 0.0;
2113     sum16 = 0.0;
2114 
2115     nonzerorow += (n>0);
2116     for (j=0; j<n; j++) {
2117       sum1  += v[jrow]*x[16*idx[jrow]];
2118       sum2  += v[jrow]*x[16*idx[jrow]+1];
2119       sum3  += v[jrow]*x[16*idx[jrow]+2];
2120       sum4  += v[jrow]*x[16*idx[jrow]+3];
2121       sum5  += v[jrow]*x[16*idx[jrow]+4];
2122       sum6  += v[jrow]*x[16*idx[jrow]+5];
2123       sum7  += v[jrow]*x[16*idx[jrow]+6];
2124       sum8  += v[jrow]*x[16*idx[jrow]+7];
2125       sum9  += v[jrow]*x[16*idx[jrow]+8];
2126       sum10 += v[jrow]*x[16*idx[jrow]+9];
2127       sum11 += v[jrow]*x[16*idx[jrow]+10];
2128       sum12 += v[jrow]*x[16*idx[jrow]+11];
2129       sum13 += v[jrow]*x[16*idx[jrow]+12];
2130       sum14 += v[jrow]*x[16*idx[jrow]+13];
2131       sum15 += v[jrow]*x[16*idx[jrow]+14];
2132       sum16 += v[jrow]*x[16*idx[jrow]+15];
2133       jrow++;
2134     }
2135     y[16*i]    = sum1;
2136     y[16*i+1]  = sum2;
2137     y[16*i+2]  = sum3;
2138     y[16*i+3]  = sum4;
2139     y[16*i+4]  = sum5;
2140     y[16*i+5]  = sum6;
2141     y[16*i+6]  = sum7;
2142     y[16*i+7]  = sum8;
2143     y[16*i+8]  = sum9;
2144     y[16*i+9]  = sum10;
2145     y[16*i+10] = sum11;
2146     y[16*i+11] = sum12;
2147     y[16*i+12] = sum13;
2148     y[16*i+13] = sum14;
2149     y[16*i+14] = sum15;
2150     y[16*i+15] = sum16;
2151   }
2152 
2153   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
2154   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2155   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2160 {
2161   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2162   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2163   const PetscScalar *x,*v;
2164   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2165   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2166   PetscErrorCode    ierr;
2167   const PetscInt    m = b->AIJ->rmap->n,*idx;
2168   PetscInt          n,i;
2169 
2170   PetscFunctionBegin;
2171   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2172   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2173   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2174 
2175   for (i=0; i<m; i++) {
2176     idx     = a->j + a->i[i];
2177     v       = a->a + a->i[i];
2178     n       = a->i[i+1] - a->i[i];
2179     alpha1  = x[16*i];
2180     alpha2  = x[16*i+1];
2181     alpha3  = x[16*i+2];
2182     alpha4  = x[16*i+3];
2183     alpha5  = x[16*i+4];
2184     alpha6  = x[16*i+5];
2185     alpha7  = x[16*i+6];
2186     alpha8  = x[16*i+7];
2187     alpha9  = x[16*i+8];
2188     alpha10 = x[16*i+9];
2189     alpha11 = x[16*i+10];
2190     alpha12 = x[16*i+11];
2191     alpha13 = x[16*i+12];
2192     alpha14 = x[16*i+13];
2193     alpha15 = x[16*i+14];
2194     alpha16 = x[16*i+15];
2195     while (n-->0) {
2196       y[16*(*idx)]    += alpha1*(*v);
2197       y[16*(*idx)+1]  += alpha2*(*v);
2198       y[16*(*idx)+2]  += alpha3*(*v);
2199       y[16*(*idx)+3]  += alpha4*(*v);
2200       y[16*(*idx)+4]  += alpha5*(*v);
2201       y[16*(*idx)+5]  += alpha6*(*v);
2202       y[16*(*idx)+6]  += alpha7*(*v);
2203       y[16*(*idx)+7]  += alpha8*(*v);
2204       y[16*(*idx)+8]  += alpha9*(*v);
2205       y[16*(*idx)+9]  += alpha10*(*v);
2206       y[16*(*idx)+10] += alpha11*(*v);
2207       y[16*(*idx)+11] += alpha12*(*v);
2208       y[16*(*idx)+12] += alpha13*(*v);
2209       y[16*(*idx)+13] += alpha14*(*v);
2210       y[16*(*idx)+14] += alpha15*(*v);
2211       y[16*(*idx)+15] += alpha16*(*v);
2212       idx++; v++;
2213     }
2214   }
2215   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2216   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2217   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2222 {
2223   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2224   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2225   const PetscScalar *x,*v;
2226   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2227   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2228   PetscErrorCode    ierr;
2229   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2230   PetscInt          n,i,jrow,j;
2231 
2232   PetscFunctionBegin;
2233   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2234   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2235   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2236   idx  = a->j;
2237   v    = a->a;
2238   ii   = a->i;
2239 
2240   for (i=0; i<m; i++) {
2241     jrow  = ii[i];
2242     n     = ii[i+1] - jrow;
2243     sum1  = 0.0;
2244     sum2  = 0.0;
2245     sum3  = 0.0;
2246     sum4  = 0.0;
2247     sum5  = 0.0;
2248     sum6  = 0.0;
2249     sum7  = 0.0;
2250     sum8  = 0.0;
2251     sum9  = 0.0;
2252     sum10 = 0.0;
2253     sum11 = 0.0;
2254     sum12 = 0.0;
2255     sum13 = 0.0;
2256     sum14 = 0.0;
2257     sum15 = 0.0;
2258     sum16 = 0.0;
2259     for (j=0; j<n; j++) {
2260       sum1  += v[jrow]*x[16*idx[jrow]];
2261       sum2  += v[jrow]*x[16*idx[jrow]+1];
2262       sum3  += v[jrow]*x[16*idx[jrow]+2];
2263       sum4  += v[jrow]*x[16*idx[jrow]+3];
2264       sum5  += v[jrow]*x[16*idx[jrow]+4];
2265       sum6  += v[jrow]*x[16*idx[jrow]+5];
2266       sum7  += v[jrow]*x[16*idx[jrow]+6];
2267       sum8  += v[jrow]*x[16*idx[jrow]+7];
2268       sum9  += v[jrow]*x[16*idx[jrow]+8];
2269       sum10 += v[jrow]*x[16*idx[jrow]+9];
2270       sum11 += v[jrow]*x[16*idx[jrow]+10];
2271       sum12 += v[jrow]*x[16*idx[jrow]+11];
2272       sum13 += v[jrow]*x[16*idx[jrow]+12];
2273       sum14 += v[jrow]*x[16*idx[jrow]+13];
2274       sum15 += v[jrow]*x[16*idx[jrow]+14];
2275       sum16 += v[jrow]*x[16*idx[jrow]+15];
2276       jrow++;
2277     }
2278     y[16*i]    += sum1;
2279     y[16*i+1]  += sum2;
2280     y[16*i+2]  += sum3;
2281     y[16*i+3]  += sum4;
2282     y[16*i+4]  += sum5;
2283     y[16*i+5]  += sum6;
2284     y[16*i+6]  += sum7;
2285     y[16*i+7]  += sum8;
2286     y[16*i+8]  += sum9;
2287     y[16*i+9]  += sum10;
2288     y[16*i+10] += sum11;
2289     y[16*i+11] += sum12;
2290     y[16*i+12] += sum13;
2291     y[16*i+13] += sum14;
2292     y[16*i+14] += sum15;
2293     y[16*i+15] += sum16;
2294   }
2295 
2296   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2297   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2298   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2303 {
2304   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2305   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2306   const PetscScalar *x,*v;
2307   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2308   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2309   PetscErrorCode    ierr;
2310   const PetscInt    m = b->AIJ->rmap->n,*idx;
2311   PetscInt          n,i;
2312 
2313   PetscFunctionBegin;
2314   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2315   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2316   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2317   for (i=0; i<m; i++) {
2318     idx     = a->j + a->i[i];
2319     v       = a->a + a->i[i];
2320     n       = a->i[i+1] - a->i[i];
2321     alpha1  = x[16*i];
2322     alpha2  = x[16*i+1];
2323     alpha3  = x[16*i+2];
2324     alpha4  = x[16*i+3];
2325     alpha5  = x[16*i+4];
2326     alpha6  = x[16*i+5];
2327     alpha7  = x[16*i+6];
2328     alpha8  = x[16*i+7];
2329     alpha9  = x[16*i+8];
2330     alpha10 = x[16*i+9];
2331     alpha11 = x[16*i+10];
2332     alpha12 = x[16*i+11];
2333     alpha13 = x[16*i+12];
2334     alpha14 = x[16*i+13];
2335     alpha15 = x[16*i+14];
2336     alpha16 = x[16*i+15];
2337     while (n-->0) {
2338       y[16*(*idx)]    += alpha1*(*v);
2339       y[16*(*idx)+1]  += alpha2*(*v);
2340       y[16*(*idx)+2]  += alpha3*(*v);
2341       y[16*(*idx)+3]  += alpha4*(*v);
2342       y[16*(*idx)+4]  += alpha5*(*v);
2343       y[16*(*idx)+5]  += alpha6*(*v);
2344       y[16*(*idx)+6]  += alpha7*(*v);
2345       y[16*(*idx)+7]  += alpha8*(*v);
2346       y[16*(*idx)+8]  += alpha9*(*v);
2347       y[16*(*idx)+9]  += alpha10*(*v);
2348       y[16*(*idx)+10] += alpha11*(*v);
2349       y[16*(*idx)+11] += alpha12*(*v);
2350       y[16*(*idx)+12] += alpha13*(*v);
2351       y[16*(*idx)+13] += alpha14*(*v);
2352       y[16*(*idx)+14] += alpha15*(*v);
2353       y[16*(*idx)+15] += alpha16*(*v);
2354       idx++; v++;
2355     }
2356   }
2357   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2358   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2359   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2360   PetscFunctionReturn(0);
2361 }
2362 
2363 /*--------------------------------------------------------------------------------------------*/
2364 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2365 {
2366   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2367   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2368   const PetscScalar *x,*v;
2369   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2370   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2371   PetscErrorCode    ierr;
2372   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2373   PetscInt          nonzerorow=0,n,i,jrow,j;
2374 
2375   PetscFunctionBegin;
2376   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2377   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2378   idx  = a->j;
2379   v    = a->a;
2380   ii   = a->i;
2381 
2382   for (i=0; i<m; i++) {
2383     jrow  = ii[i];
2384     n     = ii[i+1] - jrow;
2385     sum1  = 0.0;
2386     sum2  = 0.0;
2387     sum3  = 0.0;
2388     sum4  = 0.0;
2389     sum5  = 0.0;
2390     sum6  = 0.0;
2391     sum7  = 0.0;
2392     sum8  = 0.0;
2393     sum9  = 0.0;
2394     sum10 = 0.0;
2395     sum11 = 0.0;
2396     sum12 = 0.0;
2397     sum13 = 0.0;
2398     sum14 = 0.0;
2399     sum15 = 0.0;
2400     sum16 = 0.0;
2401     sum17 = 0.0;
2402     sum18 = 0.0;
2403 
2404     nonzerorow += (n>0);
2405     for (j=0; j<n; j++) {
2406       sum1  += v[jrow]*x[18*idx[jrow]];
2407       sum2  += v[jrow]*x[18*idx[jrow]+1];
2408       sum3  += v[jrow]*x[18*idx[jrow]+2];
2409       sum4  += v[jrow]*x[18*idx[jrow]+3];
2410       sum5  += v[jrow]*x[18*idx[jrow]+4];
2411       sum6  += v[jrow]*x[18*idx[jrow]+5];
2412       sum7  += v[jrow]*x[18*idx[jrow]+6];
2413       sum8  += v[jrow]*x[18*idx[jrow]+7];
2414       sum9  += v[jrow]*x[18*idx[jrow]+8];
2415       sum10 += v[jrow]*x[18*idx[jrow]+9];
2416       sum11 += v[jrow]*x[18*idx[jrow]+10];
2417       sum12 += v[jrow]*x[18*idx[jrow]+11];
2418       sum13 += v[jrow]*x[18*idx[jrow]+12];
2419       sum14 += v[jrow]*x[18*idx[jrow]+13];
2420       sum15 += v[jrow]*x[18*idx[jrow]+14];
2421       sum16 += v[jrow]*x[18*idx[jrow]+15];
2422       sum17 += v[jrow]*x[18*idx[jrow]+16];
2423       sum18 += v[jrow]*x[18*idx[jrow]+17];
2424       jrow++;
2425     }
2426     y[18*i]    = sum1;
2427     y[18*i+1]  = sum2;
2428     y[18*i+2]  = sum3;
2429     y[18*i+3]  = sum4;
2430     y[18*i+4]  = sum5;
2431     y[18*i+5]  = sum6;
2432     y[18*i+6]  = sum7;
2433     y[18*i+7]  = sum8;
2434     y[18*i+8]  = sum9;
2435     y[18*i+9]  = sum10;
2436     y[18*i+10] = sum11;
2437     y[18*i+11] = sum12;
2438     y[18*i+12] = sum13;
2439     y[18*i+13] = sum14;
2440     y[18*i+14] = sum15;
2441     y[18*i+15] = sum16;
2442     y[18*i+16] = sum17;
2443     y[18*i+17] = sum18;
2444   }
2445 
2446   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
2447   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2448   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2453 {
2454   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2455   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2456   const PetscScalar *x,*v;
2457   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2458   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2459   PetscErrorCode    ierr;
2460   const PetscInt    m = b->AIJ->rmap->n,*idx;
2461   PetscInt          n,i;
2462 
2463   PetscFunctionBegin;
2464   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2465   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2466   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2467 
2468   for (i=0; i<m; i++) {
2469     idx     = a->j + a->i[i];
2470     v       = a->a + a->i[i];
2471     n       = a->i[i+1] - a->i[i];
2472     alpha1  = x[18*i];
2473     alpha2  = x[18*i+1];
2474     alpha3  = x[18*i+2];
2475     alpha4  = x[18*i+3];
2476     alpha5  = x[18*i+4];
2477     alpha6  = x[18*i+5];
2478     alpha7  = x[18*i+6];
2479     alpha8  = x[18*i+7];
2480     alpha9  = x[18*i+8];
2481     alpha10 = x[18*i+9];
2482     alpha11 = x[18*i+10];
2483     alpha12 = x[18*i+11];
2484     alpha13 = x[18*i+12];
2485     alpha14 = x[18*i+13];
2486     alpha15 = x[18*i+14];
2487     alpha16 = x[18*i+15];
2488     alpha17 = x[18*i+16];
2489     alpha18 = x[18*i+17];
2490     while (n-->0) {
2491       y[18*(*idx)]    += alpha1*(*v);
2492       y[18*(*idx)+1]  += alpha2*(*v);
2493       y[18*(*idx)+2]  += alpha3*(*v);
2494       y[18*(*idx)+3]  += alpha4*(*v);
2495       y[18*(*idx)+4]  += alpha5*(*v);
2496       y[18*(*idx)+5]  += alpha6*(*v);
2497       y[18*(*idx)+6]  += alpha7*(*v);
2498       y[18*(*idx)+7]  += alpha8*(*v);
2499       y[18*(*idx)+8]  += alpha9*(*v);
2500       y[18*(*idx)+9]  += alpha10*(*v);
2501       y[18*(*idx)+10] += alpha11*(*v);
2502       y[18*(*idx)+11] += alpha12*(*v);
2503       y[18*(*idx)+12] += alpha13*(*v);
2504       y[18*(*idx)+13] += alpha14*(*v);
2505       y[18*(*idx)+14] += alpha15*(*v);
2506       y[18*(*idx)+15] += alpha16*(*v);
2507       y[18*(*idx)+16] += alpha17*(*v);
2508       y[18*(*idx)+17] += alpha18*(*v);
2509       idx++; v++;
2510     }
2511   }
2512   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2513   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2514   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2519 {
2520   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2521   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2522   const PetscScalar *x,*v;
2523   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2524   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2525   PetscErrorCode    ierr;
2526   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2527   PetscInt          n,i,jrow,j;
2528 
2529   PetscFunctionBegin;
2530   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2531   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2532   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2533   idx  = a->j;
2534   v    = a->a;
2535   ii   = a->i;
2536 
2537   for (i=0; i<m; i++) {
2538     jrow  = ii[i];
2539     n     = ii[i+1] - jrow;
2540     sum1  = 0.0;
2541     sum2  = 0.0;
2542     sum3  = 0.0;
2543     sum4  = 0.0;
2544     sum5  = 0.0;
2545     sum6  = 0.0;
2546     sum7  = 0.0;
2547     sum8  = 0.0;
2548     sum9  = 0.0;
2549     sum10 = 0.0;
2550     sum11 = 0.0;
2551     sum12 = 0.0;
2552     sum13 = 0.0;
2553     sum14 = 0.0;
2554     sum15 = 0.0;
2555     sum16 = 0.0;
2556     sum17 = 0.0;
2557     sum18 = 0.0;
2558     for (j=0; j<n; j++) {
2559       sum1  += v[jrow]*x[18*idx[jrow]];
2560       sum2  += v[jrow]*x[18*idx[jrow]+1];
2561       sum3  += v[jrow]*x[18*idx[jrow]+2];
2562       sum4  += v[jrow]*x[18*idx[jrow]+3];
2563       sum5  += v[jrow]*x[18*idx[jrow]+4];
2564       sum6  += v[jrow]*x[18*idx[jrow]+5];
2565       sum7  += v[jrow]*x[18*idx[jrow]+6];
2566       sum8  += v[jrow]*x[18*idx[jrow]+7];
2567       sum9  += v[jrow]*x[18*idx[jrow]+8];
2568       sum10 += v[jrow]*x[18*idx[jrow]+9];
2569       sum11 += v[jrow]*x[18*idx[jrow]+10];
2570       sum12 += v[jrow]*x[18*idx[jrow]+11];
2571       sum13 += v[jrow]*x[18*idx[jrow]+12];
2572       sum14 += v[jrow]*x[18*idx[jrow]+13];
2573       sum15 += v[jrow]*x[18*idx[jrow]+14];
2574       sum16 += v[jrow]*x[18*idx[jrow]+15];
2575       sum17 += v[jrow]*x[18*idx[jrow]+16];
2576       sum18 += v[jrow]*x[18*idx[jrow]+17];
2577       jrow++;
2578     }
2579     y[18*i]    += sum1;
2580     y[18*i+1]  += sum2;
2581     y[18*i+2]  += sum3;
2582     y[18*i+3]  += sum4;
2583     y[18*i+4]  += sum5;
2584     y[18*i+5]  += sum6;
2585     y[18*i+6]  += sum7;
2586     y[18*i+7]  += sum8;
2587     y[18*i+8]  += sum9;
2588     y[18*i+9]  += sum10;
2589     y[18*i+10] += sum11;
2590     y[18*i+11] += sum12;
2591     y[18*i+12] += sum13;
2592     y[18*i+13] += sum14;
2593     y[18*i+14] += sum15;
2594     y[18*i+15] += sum16;
2595     y[18*i+16] += sum17;
2596     y[18*i+17] += sum18;
2597   }
2598 
2599   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2600   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2601   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2602   PetscFunctionReturn(0);
2603 }
2604 
2605 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2606 {
2607   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2608   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2609   const PetscScalar *x,*v;
2610   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2611   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2612   PetscErrorCode    ierr;
2613   const PetscInt    m = b->AIJ->rmap->n,*idx;
2614   PetscInt          n,i;
2615 
2616   PetscFunctionBegin;
2617   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2618   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2619   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2620   for (i=0; i<m; i++) {
2621     idx     = a->j + a->i[i];
2622     v       = a->a + a->i[i];
2623     n       = a->i[i+1] - a->i[i];
2624     alpha1  = x[18*i];
2625     alpha2  = x[18*i+1];
2626     alpha3  = x[18*i+2];
2627     alpha4  = x[18*i+3];
2628     alpha5  = x[18*i+4];
2629     alpha6  = x[18*i+5];
2630     alpha7  = x[18*i+6];
2631     alpha8  = x[18*i+7];
2632     alpha9  = x[18*i+8];
2633     alpha10 = x[18*i+9];
2634     alpha11 = x[18*i+10];
2635     alpha12 = x[18*i+11];
2636     alpha13 = x[18*i+12];
2637     alpha14 = x[18*i+13];
2638     alpha15 = x[18*i+14];
2639     alpha16 = x[18*i+15];
2640     alpha17 = x[18*i+16];
2641     alpha18 = x[18*i+17];
2642     while (n-->0) {
2643       y[18*(*idx)]    += alpha1*(*v);
2644       y[18*(*idx)+1]  += alpha2*(*v);
2645       y[18*(*idx)+2]  += alpha3*(*v);
2646       y[18*(*idx)+3]  += alpha4*(*v);
2647       y[18*(*idx)+4]  += alpha5*(*v);
2648       y[18*(*idx)+5]  += alpha6*(*v);
2649       y[18*(*idx)+6]  += alpha7*(*v);
2650       y[18*(*idx)+7]  += alpha8*(*v);
2651       y[18*(*idx)+8]  += alpha9*(*v);
2652       y[18*(*idx)+9]  += alpha10*(*v);
2653       y[18*(*idx)+10] += alpha11*(*v);
2654       y[18*(*idx)+11] += alpha12*(*v);
2655       y[18*(*idx)+12] += alpha13*(*v);
2656       y[18*(*idx)+13] += alpha14*(*v);
2657       y[18*(*idx)+14] += alpha15*(*v);
2658       y[18*(*idx)+15] += alpha16*(*v);
2659       y[18*(*idx)+16] += alpha17*(*v);
2660       y[18*(*idx)+17] += alpha18*(*v);
2661       idx++; v++;
2662     }
2663   }
2664   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2665   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2666   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2667   PetscFunctionReturn(0);
2668 }
2669 
2670 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2671 {
2672   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2673   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2674   const PetscScalar *x,*v;
2675   PetscScalar       *y,*sums;
2676   PetscErrorCode    ierr;
2677   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2678   PetscInt          n,i,jrow,j,dof = b->dof,k;
2679 
2680   PetscFunctionBegin;
2681   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2682   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2683   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2684   idx  = a->j;
2685   v    = a->a;
2686   ii   = a->i;
2687 
2688   for (i=0; i<m; i++) {
2689     jrow = ii[i];
2690     n    = ii[i+1] - jrow;
2691     sums = y + dof*i;
2692     for (j=0; j<n; j++) {
2693       for (k=0; k<dof; k++) {
2694         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2695       }
2696       jrow++;
2697     }
2698   }
2699 
2700   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2701   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2702   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2703   PetscFunctionReturn(0);
2704 }
2705 
2706 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2707 {
2708   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2709   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2710   const PetscScalar *x,*v;
2711   PetscScalar       *y,*sums;
2712   PetscErrorCode    ierr;
2713   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2714   PetscInt          n,i,jrow,j,dof = b->dof,k;
2715 
2716   PetscFunctionBegin;
2717   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2718   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2719   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2720   idx  = a->j;
2721   v    = a->a;
2722   ii   = a->i;
2723 
2724   for (i=0; i<m; i++) {
2725     jrow = ii[i];
2726     n    = ii[i+1] - jrow;
2727     sums = y + dof*i;
2728     for (j=0; j<n; j++) {
2729       for (k=0; k<dof; k++) {
2730         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2731       }
2732       jrow++;
2733     }
2734   }
2735 
2736   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2737   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2738   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2739   PetscFunctionReturn(0);
2740 }
2741 
2742 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2743 {
2744   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2745   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2746   const PetscScalar *x,*v,*alpha;
2747   PetscScalar       *y;
2748   PetscErrorCode    ierr;
2749   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2750   PetscInt          n,i,k;
2751 
2752   PetscFunctionBegin;
2753   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2754   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2755   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2756   for (i=0; i<m; i++) {
2757     idx   = a->j + a->i[i];
2758     v     = a->a + a->i[i];
2759     n     = a->i[i+1] - a->i[i];
2760     alpha = x + dof*i;
2761     while (n-->0) {
2762       for (k=0; k<dof; k++) {
2763         y[dof*(*idx)+k] += alpha[k]*(*v);
2764       }
2765       idx++; v++;
2766     }
2767   }
2768   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2769   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2770   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2771   PetscFunctionReturn(0);
2772 }
2773 
2774 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2775 {
2776   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2777   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2778   const PetscScalar *x,*v,*alpha;
2779   PetscScalar       *y;
2780   PetscErrorCode    ierr;
2781   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2782   PetscInt          n,i,k;
2783 
2784   PetscFunctionBegin;
2785   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2786   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2787   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2788   for (i=0; i<m; i++) {
2789     idx   = a->j + a->i[i];
2790     v     = a->a + a->i[i];
2791     n     = a->i[i+1] - a->i[i];
2792     alpha = x + dof*i;
2793     while (n-->0) {
2794       for (k=0; k<dof; k++) {
2795         y[dof*(*idx)+k] += alpha[k]*(*v);
2796       }
2797       idx++; v++;
2798     }
2799   }
2800   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2801   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2802   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2803   PetscFunctionReturn(0);
2804 }
2805 
2806 /*===================================================================================*/
2807 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2808 {
2809   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2810   PetscErrorCode ierr;
2811 
2812   PetscFunctionBegin;
2813   /* start the scatter */
2814   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2815   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2816   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2817   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2818   PetscFunctionReturn(0);
2819 }
2820 
2821 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2822 {
2823   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2824   PetscErrorCode ierr;
2825 
2826   PetscFunctionBegin;
2827   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2828   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2829   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2830   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2831   PetscFunctionReturn(0);
2832 }
2833 
2834 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2835 {
2836   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2837   PetscErrorCode ierr;
2838 
2839   PetscFunctionBegin;
2840   /* start the scatter */
2841   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2842   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2843   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2844   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2845   PetscFunctionReturn(0);
2846 }
2847 
2848 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2849 {
2850   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2851   PetscErrorCode ierr;
2852 
2853   PetscFunctionBegin;
2854   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2855   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2856   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2857   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2858   PetscFunctionReturn(0);
2859 }
2860 
2861 /* ----------------------------------------------------------------*/
2862 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2863 {
2864   PetscErrorCode     ierr;
2865   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2866   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2867   Mat                P         =pp->AIJ;
2868   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2869   PetscInt           *pti,*ptj,*ptJ;
2870   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2871   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2872   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2873   MatScalar          *ca;
2874   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2875 
2876   PetscFunctionBegin;
2877   /* Get ij structure of P^T */
2878   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2879 
2880   cn = pn*ppdof;
2881   /* Allocate ci array, arrays for fill computation and */
2882   /* free space for accumulating nonzero column info */
2883   ierr  = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr);
2884   ci[0] = 0;
2885 
2886   /* Work arrays for rows of P^T*A */
2887   ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr);
2888   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
2889   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
2890 
2891   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2892   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2893   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2894   ierr          = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr);
2895   current_space = free_space;
2896 
2897   /* Determine symbolic info for each row of C: */
2898   for (i=0; i<pn; i++) {
2899     ptnzi = pti[i+1] - pti[i];
2900     ptJ   = ptj + pti[i];
2901     for (dof=0; dof<ppdof; dof++) {
2902       ptanzi = 0;
2903       /* Determine symbolic row of PtA: */
2904       for (j=0; j<ptnzi; j++) {
2905         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2906         arow = ptJ[j]*ppdof + dof;
2907         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2908         anzj = ai[arow+1] - ai[arow];
2909         ajj  = aj + ai[arow];
2910         for (k=0; k<anzj; k++) {
2911           if (!ptadenserow[ajj[k]]) {
2912             ptadenserow[ajj[k]]    = -1;
2913             ptasparserow[ptanzi++] = ajj[k];
2914           }
2915         }
2916       }
2917       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2918       ptaj = ptasparserow;
2919       cnzi = 0;
2920       for (j=0; j<ptanzi; j++) {
2921         /* Get offset within block of P */
2922         pshift = *ptaj%ppdof;
2923         /* Get block row of P */
2924         prow = (*ptaj++)/ppdof; /* integer division */
2925         /* P has same number of nonzeros per row as the compressed form */
2926         pnzj = pi[prow+1] - pi[prow];
2927         pjj  = pj + pi[prow];
2928         for (k=0;k<pnzj;k++) {
2929           /* Locations in C are shifted by the offset within the block */
2930           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2931           if (!denserow[pjj[k]*ppdof+pshift]) {
2932             denserow[pjj[k]*ppdof+pshift] = -1;
2933             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2934           }
2935         }
2936       }
2937 
2938       /* sort sparserow */
2939       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
2940 
2941       /* If free space is not available, make more free space */
2942       /* Double the amount of total space in the list */
2943       if (current_space->local_remaining<cnzi) {
2944         ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
2945       }
2946 
2947       /* Copy data into free space, and zero out denserows */
2948       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
2949 
2950       current_space->array           += cnzi;
2951       current_space->local_used      += cnzi;
2952       current_space->local_remaining -= cnzi;
2953 
2954       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
2955       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
2956 
2957       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2958       /*        For now, we will recompute what is needed. */
2959       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2960     }
2961   }
2962   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2963   /* Allocate space for cj, initialize cj, and */
2964   /* destroy list of free space and other temporary array(s) */
2965   ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr);
2966   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
2967   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
2968 
2969   /* Allocate space for ca */
2970   ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr);
2971 
2972   /* put together the new matrix */
2973   ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
2974   ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr);
2975 
2976   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2977   /* Since these are PETSc arrays, change flags to free them as necessary. */
2978   c          = (Mat_SeqAIJ*)((*C)->data);
2979   c->free_a  = PETSC_TRUE;
2980   c->free_ij = PETSC_TRUE;
2981   c->nonew   = 0;
2982 
2983   (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2984 
2985   /* Clean up. */
2986   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2987   PetscFunctionReturn(0);
2988 }
2989 
2990 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2991 {
2992   /* This routine requires testing -- first draft only */
2993   PetscErrorCode  ierr;
2994   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
2995   Mat             P  =pp->AIJ;
2996   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
2997   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
2998   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
2999   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3000   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3001   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3002   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3003   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3004   MatScalar       *ca=c->a,*caj,*apa;
3005 
3006   PetscFunctionBegin;
3007   /* Allocate temporary array for storage of one row of A*P */
3008   ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr);
3009 
3010   /* Clear old values in C */
3011   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
3012 
3013   for (i=0; i<am; i++) {
3014     /* Form sparse row of A*P */
3015     anzi  = ai[i+1] - ai[i];
3016     apnzj = 0;
3017     for (j=0; j<anzi; j++) {
3018       /* Get offset within block of P */
3019       pshift = *aj%ppdof;
3020       /* Get block row of P */
3021       prow = *aj++/ppdof;   /* integer division */
3022       pnzj = pi[prow+1] - pi[prow];
3023       pjj  = pj + pi[prow];
3024       paj  = pa + pi[prow];
3025       for (k=0; k<pnzj; k++) {
3026         poffset = pjj[k]*ppdof+pshift;
3027         if (!apjdense[poffset]) {
3028           apjdense[poffset] = -1;
3029           apj[apnzj++]      = poffset;
3030         }
3031         apa[poffset] += (*aa)*paj[k];
3032       }
3033       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
3034       aa++;
3035     }
3036 
3037     /* Sort the j index array for quick sparse axpy. */
3038     /* Note: a array does not need sorting as it is in dense storage locations. */
3039     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
3040 
3041     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3042     prow    = i/ppdof; /* integer division */
3043     pshift  = i%ppdof;
3044     poffset = pi[prow];
3045     pnzi    = pi[prow+1] - poffset;
3046     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3047     pJ = pj+poffset;
3048     pA = pa+poffset;
3049     for (j=0; j<pnzi; j++) {
3050       crow = (*pJ)*ppdof+pshift;
3051       cjj  = cj + ci[crow];
3052       caj  = ca + ci[crow];
3053       pJ++;
3054       /* Perform sparse axpy operation.  Note cjj includes apj. */
3055       for (k=0,nextap=0; nextap<apnzj; k++) {
3056         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3057       }
3058       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
3059       pA++;
3060     }
3061 
3062     /* Zero the current row info for A*P */
3063     for (j=0; j<apnzj; j++) {
3064       apa[apj[j]]      = 0.;
3065       apjdense[apj[j]] = 0;
3066     }
3067   }
3068 
3069   /* Assemble the final matrix and clean up */
3070   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3071   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3072   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
3073   PetscFunctionReturn(0);
3074 }
3075 
3076 PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3077 {
3078   PetscErrorCode ierr;
3079 
3080   PetscFunctionBegin;
3081   if (scall == MAT_INITIAL_MATRIX) {
3082     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3083     ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr);
3084     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3085   }
3086   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3087   ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr);
3088   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3089   PetscFunctionReturn(0);
3090 }
3091 
3092 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3093 {
3094   PetscErrorCode ierr;
3095 
3096   PetscFunctionBegin;
3097   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3098   ierr = MatConvert(PP,MATMPIAIJ,MAT_INPLACE_MATRIX,&PP);CHKERRQ(ierr);
3099   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
3100   PetscFunctionReturn(0);
3101 }
3102 
3103 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3104 {
3105   PetscFunctionBegin;
3106   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3107   PetscFunctionReturn(0);
3108 }
3109 
3110 PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3111 {
3112   PetscErrorCode ierr;
3113 
3114   PetscFunctionBegin;
3115   if (scall == MAT_INITIAL_MATRIX) {
3116     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3117     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr);
3118     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3119   }
3120   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3121   ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr);
3122   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3123   PetscFunctionReturn(0);
3124 }
3125 
3126 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3127 {
3128   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3129   Mat            a    = b->AIJ,B;
3130   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3131   PetscErrorCode ierr;
3132   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3133   PetscInt       *cols;
3134   PetscScalar    *vals;
3135 
3136   PetscFunctionBegin;
3137   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3138   ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr);
3139   for (i=0; i<m; i++) {
3140     nmax = PetscMax(nmax,aij->ilen[i]);
3141     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3142   }
3143   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
3144   ierr = PetscFree(ilen);CHKERRQ(ierr);
3145   ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr);
3146   ii   = 0;
3147   for (i=0; i<m; i++) {
3148     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3149     for (j=0; j<dof; j++) {
3150       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3151       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3152       ii++;
3153     }
3154     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3155   }
3156   ierr = PetscFree(icols);CHKERRQ(ierr);
3157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3159 
3160   if (reuse == MAT_INPLACE_MATRIX) {
3161     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3162   } else {
3163     *newmat = B;
3164   }
3165   PetscFunctionReturn(0);
3166 }
3167 
3168 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3169 
3170 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3171 {
3172   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3173   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3174   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3175   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3176   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3177   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3178   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3179   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3180   PetscInt       rstart,cstart,*garray,ii,k;
3181   PetscErrorCode ierr;
3182   PetscScalar    *vals,*ovals;
3183 
3184   PetscFunctionBegin;
3185   ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr);
3186   for (i=0; i<A->rmap->n/dof; i++) {
3187     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3188     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3189     for (j=0; j<dof; j++) {
3190       dnz[dof*i+j] = AIJ->ilen[i];
3191       onz[dof*i+j] = OAIJ->ilen[i];
3192     }
3193   }
3194   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
3195   ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr);
3196   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
3197 
3198   ierr   = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr);
3199   rstart = dof*maij->A->rmap->rstart;
3200   cstart = dof*maij->A->cmap->rstart;
3201   garray = mpiaij->garray;
3202 
3203   ii = rstart;
3204   for (i=0; i<A->rmap->n/dof; i++) {
3205     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3206     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3207     for (j=0; j<dof; j++) {
3208       for (k=0; k<ncols; k++) {
3209         icols[k] = cstart + dof*cols[k]+j;
3210       }
3211       for (k=0; k<oncols; k++) {
3212         oicols[k] = dof*garray[ocols[k]]+j;
3213       }
3214       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3215       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
3216       ii++;
3217     }
3218     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3219     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3220   }
3221   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
3222 
3223   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3224   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3225 
3226   if (reuse == MAT_INPLACE_MATRIX) {
3227     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3228     ((PetscObject)A)->refct = 1;
3229 
3230     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3231 
3232     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3233   } else {
3234     *newmat = B;
3235   }
3236   PetscFunctionReturn(0);
3237 }
3238 
3239 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3240 {
3241   PetscErrorCode ierr;
3242   Mat            A;
3243 
3244   PetscFunctionBegin;
3245   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3246   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
3247   ierr = MatDestroy(&A);CHKERRQ(ierr);
3248   PetscFunctionReturn(0);
3249 }
3250 
3251 /* ---------------------------------------------------------------------------------- */
3252 /*@C
3253   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3254   operations for multicomponent problems.  It interpolates each component the same
3255   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3256   and MATMPIAIJ for distributed matrices.
3257 
3258   Collective
3259 
3260   Input Parameters:
3261 + A - the AIJ matrix describing the action on blocks
3262 - dof - the block size (number of components per node)
3263 
3264   Output Parameter:
3265 . maij - the new MAIJ matrix
3266 
3267   Operations provided:
3268 + MatMult
3269 . MatMultTranspose
3270 . MatMultAdd
3271 . MatMultTransposeAdd
3272 - MatView
3273 
3274   Level: advanced
3275 
3276 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3277 @*/
3278 PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3279 {
3280   PetscErrorCode ierr;
3281   PetscMPIInt    size;
3282   PetscInt       n;
3283   Mat            B;
3284 
3285   PetscFunctionBegin;
3286   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3287 
3288   if (dof == 1) *maij = A;
3289   else {
3290     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3291     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3292     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3293     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3294     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3295     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3296 
3297     B->assembled = PETSC_TRUE;
3298 
3299     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3300     if (size == 1) {
3301       Mat_SeqMAIJ *b;
3302 
3303       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
3304 
3305       B->ops->setup   = NULL;
3306       B->ops->destroy = MatDestroy_SeqMAIJ;
3307       B->ops->view    = MatView_SeqMAIJ;
3308       b               = (Mat_SeqMAIJ*)B->data;
3309       b->dof          = dof;
3310       b->AIJ          = A;
3311 
3312       if (dof == 2) {
3313         B->ops->mult             = MatMult_SeqMAIJ_2;
3314         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3315         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3316         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3317       } else if (dof == 3) {
3318         B->ops->mult             = MatMult_SeqMAIJ_3;
3319         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3320         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3321         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3322       } else if (dof == 4) {
3323         B->ops->mult             = MatMult_SeqMAIJ_4;
3324         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3325         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3326         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3327       } else if (dof == 5) {
3328         B->ops->mult             = MatMult_SeqMAIJ_5;
3329         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3330         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3331         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3332       } else if (dof == 6) {
3333         B->ops->mult             = MatMult_SeqMAIJ_6;
3334         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3335         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3336         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3337       } else if (dof == 7) {
3338         B->ops->mult             = MatMult_SeqMAIJ_7;
3339         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3340         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3341         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3342       } else if (dof == 8) {
3343         B->ops->mult             = MatMult_SeqMAIJ_8;
3344         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3345         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3346         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3347       } else if (dof == 9) {
3348         B->ops->mult             = MatMult_SeqMAIJ_9;
3349         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3350         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3351         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3352       } else if (dof == 10) {
3353         B->ops->mult             = MatMult_SeqMAIJ_10;
3354         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3355         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3356         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3357       } else if (dof == 11) {
3358         B->ops->mult             = MatMult_SeqMAIJ_11;
3359         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3360         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3361         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3362       } else if (dof == 16) {
3363         B->ops->mult             = MatMult_SeqMAIJ_16;
3364         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3365         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3366         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3367       } else if (dof == 18) {
3368         B->ops->mult             = MatMult_SeqMAIJ_18;
3369         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3370         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3371         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3372       } else {
3373         B->ops->mult             = MatMult_SeqMAIJ_N;
3374         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3375         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3376         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3377       }
3378       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3379       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3380     } else {
3381       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3382       Mat_MPIMAIJ *b;
3383       IS          from,to;
3384       Vec         gvec;
3385 
3386       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3387 
3388       B->ops->setup   = NULL;
3389       B->ops->destroy = MatDestroy_MPIMAIJ;
3390       B->ops->view    = MatView_MPIMAIJ;
3391 
3392       b      = (Mat_MPIMAIJ*)B->data;
3393       b->dof = dof;
3394       b->A   = A;
3395 
3396       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
3397       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
3398 
3399       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3400       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3401       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
3402       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3403       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3404 
3405       /* create two temporary Index sets for build scatter gather */
3406       ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3407       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3408 
3409       /* create temporary global vector to generate scatter context */
3410       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
3411 
3412       /* generate the scatter context */
3413       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3414 
3415       ierr = ISDestroy(&from);CHKERRQ(ierr);
3416       ierr = ISDestroy(&to);CHKERRQ(ierr);
3417       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3418 
3419       B->ops->mult             = MatMult_MPIMAIJ_dof;
3420       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3421       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3422       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3423 
3424       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3425       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr);
3426     }
3427     B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ;
3428     ierr  = MatSetUp(B);CHKERRQ(ierr);
3429     *maij = B;
3430     ierr  = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr);
3431   }
3432   PetscFunctionReturn(0);
3433 }
3434