xref: /petsc/src/mat/impls/maij/maij.c (revision 5e71baeff2f3138f93cd4f5927dfd596eb8325cc)
1 
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20 #include <../src/mat/utils/freespace.h>
21 
22 /*@
23    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
24 
25    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
26 
27    Input Parameter:
28 .  A - the MAIJ matrix
29 
30    Output Parameter:
31 .  B - the AIJ matrix
32 
33    Level: advanced
34 
35    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
36 
37 .seealso: MatCreateMAIJ()
38 @*/
39 PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
40 {
41   PetscErrorCode ierr;
42   PetscBool      ismpimaij,isseqmaij;
43 
44   PetscFunctionBegin;
45   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
46   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
47   if (ismpimaij) {
48     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
49 
50     *B = b->A;
51   } else if (isseqmaij) {
52     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
53 
54     *B = b->AIJ;
55   } else {
56     *B = A;
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 /*@
62    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
63 
64    Logically Collective
65 
66    Input Parameter:
67 +  A - the MAIJ matrix
68 -  dof - the block size for the new matrix
69 
70    Output Parameter:
71 .  B - the new MAIJ matrix
72 
73    Level: advanced
74 
75 .seealso: MatCreateMAIJ()
76 @*/
77 PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
78 {
79   PetscErrorCode ierr;
80   Mat            Aij = NULL;
81 
82   PetscFunctionBegin;
83   PetscValidLogicalCollectiveInt(A,dof,2);
84   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
85   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
90 {
91   PetscErrorCode ierr;
92   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
93 
94   PetscFunctionBegin;
95   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
96   ierr = PetscFree(A->data);CHKERRQ(ierr);
97   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr);
98   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr);
99   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijperm_seqmaij_C",NULL);CHKERRQ(ierr);
100   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijmkl_seqmaij_C",NULL);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 PetscErrorCode MatSetUp_MAIJ(Mat A)
105 {
106   PetscFunctionBegin;
107   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
108   PetscFunctionReturn(0);
109 }
110 
111 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
112 {
113   PetscErrorCode ierr;
114   Mat            B;
115 
116   PetscFunctionBegin;
117   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
118   ierr = MatView(B,viewer);CHKERRQ(ierr);
119   ierr = MatDestroy(&B);CHKERRQ(ierr);
120   PetscFunctionReturn(0);
121 }
122 
123 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
124 {
125   PetscErrorCode ierr;
126   Mat            B;
127 
128   PetscFunctionBegin;
129   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
130   ierr = MatView(B,viewer);CHKERRQ(ierr);
131   ierr = MatDestroy(&B);CHKERRQ(ierr);
132   PetscFunctionReturn(0);
133 }
134 
135 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
136 {
137   PetscErrorCode ierr;
138   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
139 
140   PetscFunctionBegin;
141   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
142   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
143   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
144   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
145   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
146   ierr = PetscFree(A->data);CHKERRQ(ierr);
147   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr);
148   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr);
149   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 /*MC
154   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
155   multicomponent problems, interpolating or restricting each component the same way independently.
156   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
157 
158   Operations provided:
159 . MatMult
160 . MatMultTranspose
161 . MatMultAdd
162 . MatMultTransposeAdd
163 
164   Level: advanced
165 
166 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
167 M*/
168 
169 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
170 {
171   PetscErrorCode ierr;
172   Mat_MPIMAIJ    *b;
173   PetscMPIInt    size;
174 
175   PetscFunctionBegin;
176   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
177   A->data  = (void*)b;
178 
179   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
180 
181   A->ops->setup = MatSetUp_MAIJ;
182 
183   b->AIJ  = 0;
184   b->dof  = 0;
185   b->OAIJ = 0;
186   b->ctx  = 0;
187   b->w    = 0;
188   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
189   if (size == 1) {
190     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
191   } else {
192     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
193   }
194   A->preallocated  = PETSC_TRUE;
195   A->assembled     = PETSC_TRUE;
196   PetscFunctionReturn(0);
197 }
198 
199 /* --------------------------------------------------------------------------------------*/
200 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
201 {
202   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
203   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
204   const PetscScalar *x,*v;
205   PetscScalar       *y, sum1, sum2;
206   PetscErrorCode    ierr;
207   PetscInt          nonzerorow=0,n,i,jrow,j;
208   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
209 
210   PetscFunctionBegin;
211   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
212   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
213   idx  = a->j;
214   v    = a->a;
215   ii   = a->i;
216 
217   for (i=0; i<m; i++) {
218     jrow  = ii[i];
219     n     = ii[i+1] - jrow;
220     sum1  = 0.0;
221     sum2  = 0.0;
222 
223     nonzerorow += (n>0);
224     for (j=0; j<n; j++) {
225       sum1 += v[jrow]*x[2*idx[jrow]];
226       sum2 += v[jrow]*x[2*idx[jrow]+1];
227       jrow++;
228     }
229     y[2*i]   = sum1;
230     y[2*i+1] = sum2;
231   }
232 
233   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
234   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
235   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
236   PetscFunctionReturn(0);
237 }
238 
239 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
240 {
241   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
242   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
243   const PetscScalar *x,*v;
244   PetscScalar       *y,alpha1,alpha2;
245   PetscErrorCode    ierr;
246   PetscInt          n,i;
247   const PetscInt    m = b->AIJ->rmap->n,*idx;
248 
249   PetscFunctionBegin;
250   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
251   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
252   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
253 
254   for (i=0; i<m; i++) {
255     idx    = a->j + a->i[i];
256     v      = a->a + a->i[i];
257     n      = a->i[i+1] - a->i[i];
258     alpha1 = x[2*i];
259     alpha2 = x[2*i+1];
260     while (n-->0) {
261       y[2*(*idx)]   += alpha1*(*v);
262       y[2*(*idx)+1] += alpha2*(*v);
263       idx++; v++;
264     }
265   }
266   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
267   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
268   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
269   PetscFunctionReturn(0);
270 }
271 
272 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
273 {
274   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
275   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
276   const PetscScalar *x,*v;
277   PetscScalar       *y,sum1, sum2;
278   PetscErrorCode    ierr;
279   PetscInt          n,i,jrow,j;
280   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
281 
282   PetscFunctionBegin;
283   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
284   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
285   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
286   idx  = a->j;
287   v    = a->a;
288   ii   = a->i;
289 
290   for (i=0; i<m; i++) {
291     jrow = ii[i];
292     n    = ii[i+1] - jrow;
293     sum1 = 0.0;
294     sum2 = 0.0;
295     for (j=0; j<n; j++) {
296       sum1 += v[jrow]*x[2*idx[jrow]];
297       sum2 += v[jrow]*x[2*idx[jrow]+1];
298       jrow++;
299     }
300     y[2*i]   += sum1;
301     y[2*i+1] += sum2;
302   }
303 
304   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
305   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
306   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
310 {
311   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
312   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
313   const PetscScalar *x,*v;
314   PetscScalar       *y,alpha1,alpha2;
315   PetscErrorCode    ierr;
316   PetscInt          n,i;
317   const PetscInt    m = b->AIJ->rmap->n,*idx;
318 
319   PetscFunctionBegin;
320   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
321   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
322   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
323 
324   for (i=0; i<m; i++) {
325     idx    = a->j + a->i[i];
326     v      = a->a + a->i[i];
327     n      = a->i[i+1] - a->i[i];
328     alpha1 = x[2*i];
329     alpha2 = x[2*i+1];
330     while (n-->0) {
331       y[2*(*idx)]   += alpha1*(*v);
332       y[2*(*idx)+1] += alpha2*(*v);
333       idx++; v++;
334     }
335   }
336   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
337   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
338   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
339   PetscFunctionReturn(0);
340 }
341 /* --------------------------------------------------------------------------------------*/
342 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
343 {
344   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
345   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
346   const PetscScalar *x,*v;
347   PetscScalar       *y,sum1, sum2, sum3;
348   PetscErrorCode    ierr;
349   PetscInt          nonzerorow=0,n,i,jrow,j;
350   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
351 
352   PetscFunctionBegin;
353   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
354   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
355   idx  = a->j;
356   v    = a->a;
357   ii   = a->i;
358 
359   for (i=0; i<m; i++) {
360     jrow  = ii[i];
361     n     = ii[i+1] - jrow;
362     sum1  = 0.0;
363     sum2  = 0.0;
364     sum3  = 0.0;
365 
366     nonzerorow += (n>0);
367     for (j=0; j<n; j++) {
368       sum1 += v[jrow]*x[3*idx[jrow]];
369       sum2 += v[jrow]*x[3*idx[jrow]+1];
370       sum3 += v[jrow]*x[3*idx[jrow]+2];
371       jrow++;
372      }
373     y[3*i]   = sum1;
374     y[3*i+1] = sum2;
375     y[3*i+2] = sum3;
376   }
377 
378   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
379   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
380   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
381   PetscFunctionReturn(0);
382 }
383 
384 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
385 {
386   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
387   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
388   const PetscScalar *x,*v;
389   PetscScalar       *y,alpha1,alpha2,alpha3;
390   PetscErrorCode    ierr;
391   PetscInt          n,i;
392   const PetscInt    m = b->AIJ->rmap->n,*idx;
393 
394   PetscFunctionBegin;
395   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
396   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
397   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
398 
399   for (i=0; i<m; i++) {
400     idx    = a->j + a->i[i];
401     v      = a->a + a->i[i];
402     n      = a->i[i+1] - a->i[i];
403     alpha1 = x[3*i];
404     alpha2 = x[3*i+1];
405     alpha3 = x[3*i+2];
406     while (n-->0) {
407       y[3*(*idx)]   += alpha1*(*v);
408       y[3*(*idx)+1] += alpha2*(*v);
409       y[3*(*idx)+2] += alpha3*(*v);
410       idx++; v++;
411     }
412   }
413   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
414   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
415   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
420 {
421   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
422   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
423   const PetscScalar *x,*v;
424   PetscScalar       *y,sum1, sum2, sum3;
425   PetscErrorCode    ierr;
426   PetscInt          n,i,jrow,j;
427   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
428 
429   PetscFunctionBegin;
430   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
431   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
432   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
433   idx  = a->j;
434   v    = a->a;
435   ii   = a->i;
436 
437   for (i=0; i<m; i++) {
438     jrow = ii[i];
439     n    = ii[i+1] - jrow;
440     sum1 = 0.0;
441     sum2 = 0.0;
442     sum3 = 0.0;
443     for (j=0; j<n; j++) {
444       sum1 += v[jrow]*x[3*idx[jrow]];
445       sum2 += v[jrow]*x[3*idx[jrow]+1];
446       sum3 += v[jrow]*x[3*idx[jrow]+2];
447       jrow++;
448     }
449     y[3*i]   += sum1;
450     y[3*i+1] += sum2;
451     y[3*i+2] += sum3;
452   }
453 
454   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
455   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
456   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
457   PetscFunctionReturn(0);
458 }
459 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
460 {
461   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
462   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
463   const PetscScalar *x,*v;
464   PetscScalar       *y,alpha1,alpha2,alpha3;
465   PetscErrorCode    ierr;
466   PetscInt          n,i;
467   const PetscInt    m = b->AIJ->rmap->n,*idx;
468 
469   PetscFunctionBegin;
470   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
471   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
472   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
473   for (i=0; i<m; i++) {
474     idx    = a->j + a->i[i];
475     v      = a->a + a->i[i];
476     n      = a->i[i+1] - a->i[i];
477     alpha1 = x[3*i];
478     alpha2 = x[3*i+1];
479     alpha3 = x[3*i+2];
480     while (n-->0) {
481       y[3*(*idx)]   += alpha1*(*v);
482       y[3*(*idx)+1] += alpha2*(*v);
483       y[3*(*idx)+2] += alpha3*(*v);
484       idx++; v++;
485     }
486   }
487   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
488   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
489   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 /* ------------------------------------------------------------------------------*/
494 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
495 {
496   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
497   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
498   const PetscScalar *x,*v;
499   PetscScalar       *y,sum1, sum2, sum3, sum4;
500   PetscErrorCode    ierr;
501   PetscInt          nonzerorow=0,n,i,jrow,j;
502   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
503 
504   PetscFunctionBegin;
505   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
506   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
507   idx  = a->j;
508   v    = a->a;
509   ii   = a->i;
510 
511   for (i=0; i<m; i++) {
512     jrow        = ii[i];
513     n           = ii[i+1] - jrow;
514     sum1        = 0.0;
515     sum2        = 0.0;
516     sum3        = 0.0;
517     sum4        = 0.0;
518     nonzerorow += (n>0);
519     for (j=0; j<n; j++) {
520       sum1 += v[jrow]*x[4*idx[jrow]];
521       sum2 += v[jrow]*x[4*idx[jrow]+1];
522       sum3 += v[jrow]*x[4*idx[jrow]+2];
523       sum4 += v[jrow]*x[4*idx[jrow]+3];
524       jrow++;
525     }
526     y[4*i]   = sum1;
527     y[4*i+1] = sum2;
528     y[4*i+2] = sum3;
529     y[4*i+3] = sum4;
530   }
531 
532   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
533   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
534   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
535   PetscFunctionReturn(0);
536 }
537 
538 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
539 {
540   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
541   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
542   const PetscScalar *x,*v;
543   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
544   PetscErrorCode    ierr;
545   PetscInt          n,i;
546   const PetscInt    m = b->AIJ->rmap->n,*idx;
547 
548   PetscFunctionBegin;
549   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
550   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
551   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
552   for (i=0; i<m; i++) {
553     idx    = a->j + a->i[i];
554     v      = a->a + a->i[i];
555     n      = a->i[i+1] - a->i[i];
556     alpha1 = x[4*i];
557     alpha2 = x[4*i+1];
558     alpha3 = x[4*i+2];
559     alpha4 = x[4*i+3];
560     while (n-->0) {
561       y[4*(*idx)]   += alpha1*(*v);
562       y[4*(*idx)+1] += alpha2*(*v);
563       y[4*(*idx)+2] += alpha3*(*v);
564       y[4*(*idx)+3] += alpha4*(*v);
565       idx++; v++;
566     }
567   }
568   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
569   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
570   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
571   PetscFunctionReturn(0);
572 }
573 
574 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
575 {
576   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
577   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
578   const PetscScalar *x,*v;
579   PetscScalar       *y,sum1, sum2, sum3, sum4;
580   PetscErrorCode    ierr;
581   PetscInt          n,i,jrow,j;
582   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
583 
584   PetscFunctionBegin;
585   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
586   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
587   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
588   idx  = a->j;
589   v    = a->a;
590   ii   = a->i;
591 
592   for (i=0; i<m; i++) {
593     jrow = ii[i];
594     n    = ii[i+1] - jrow;
595     sum1 = 0.0;
596     sum2 = 0.0;
597     sum3 = 0.0;
598     sum4 = 0.0;
599     for (j=0; j<n; j++) {
600       sum1 += v[jrow]*x[4*idx[jrow]];
601       sum2 += v[jrow]*x[4*idx[jrow]+1];
602       sum3 += v[jrow]*x[4*idx[jrow]+2];
603       sum4 += v[jrow]*x[4*idx[jrow]+3];
604       jrow++;
605     }
606     y[4*i]   += sum1;
607     y[4*i+1] += sum2;
608     y[4*i+2] += sum3;
609     y[4*i+3] += sum4;
610   }
611 
612   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
613   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
614   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
615   PetscFunctionReturn(0);
616 }
617 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
618 {
619   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
620   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
621   const PetscScalar *x,*v;
622   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
623   PetscErrorCode    ierr;
624   PetscInt          n,i;
625   const PetscInt    m = b->AIJ->rmap->n,*idx;
626 
627   PetscFunctionBegin;
628   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
629   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
630   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
631 
632   for (i=0; i<m; i++) {
633     idx    = a->j + a->i[i];
634     v      = a->a + a->i[i];
635     n      = a->i[i+1] - a->i[i];
636     alpha1 = x[4*i];
637     alpha2 = x[4*i+1];
638     alpha3 = x[4*i+2];
639     alpha4 = x[4*i+3];
640     while (n-->0) {
641       y[4*(*idx)]   += alpha1*(*v);
642       y[4*(*idx)+1] += alpha2*(*v);
643       y[4*(*idx)+2] += alpha3*(*v);
644       y[4*(*idx)+3] += alpha4*(*v);
645       idx++; v++;
646     }
647   }
648   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
649   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
650   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
651   PetscFunctionReturn(0);
652 }
653 /* ------------------------------------------------------------------------------*/
654 
655 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
656 {
657   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
658   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
659   const PetscScalar *x,*v;
660   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
661   PetscErrorCode    ierr;
662   PetscInt          nonzerorow=0,n,i,jrow,j;
663   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
664 
665   PetscFunctionBegin;
666   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
667   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
668   idx  = a->j;
669   v    = a->a;
670   ii   = a->i;
671 
672   for (i=0; i<m; i++) {
673     jrow  = ii[i];
674     n     = ii[i+1] - jrow;
675     sum1  = 0.0;
676     sum2  = 0.0;
677     sum3  = 0.0;
678     sum4  = 0.0;
679     sum5  = 0.0;
680 
681     nonzerorow += (n>0);
682     for (j=0; j<n; j++) {
683       sum1 += v[jrow]*x[5*idx[jrow]];
684       sum2 += v[jrow]*x[5*idx[jrow]+1];
685       sum3 += v[jrow]*x[5*idx[jrow]+2];
686       sum4 += v[jrow]*x[5*idx[jrow]+3];
687       sum5 += v[jrow]*x[5*idx[jrow]+4];
688       jrow++;
689     }
690     y[5*i]   = sum1;
691     y[5*i+1] = sum2;
692     y[5*i+2] = sum3;
693     y[5*i+3] = sum4;
694     y[5*i+4] = sum5;
695   }
696 
697   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
698   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
699   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
704 {
705   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
706   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
707   const PetscScalar *x,*v;
708   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
709   PetscErrorCode    ierr;
710   PetscInt          n,i;
711   const PetscInt    m = b->AIJ->rmap->n,*idx;
712 
713   PetscFunctionBegin;
714   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
715   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
716   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
717 
718   for (i=0; i<m; i++) {
719     idx    = a->j + a->i[i];
720     v      = a->a + a->i[i];
721     n      = a->i[i+1] - a->i[i];
722     alpha1 = x[5*i];
723     alpha2 = x[5*i+1];
724     alpha3 = x[5*i+2];
725     alpha4 = x[5*i+3];
726     alpha5 = x[5*i+4];
727     while (n-->0) {
728       y[5*(*idx)]   += alpha1*(*v);
729       y[5*(*idx)+1] += alpha2*(*v);
730       y[5*(*idx)+2] += alpha3*(*v);
731       y[5*(*idx)+3] += alpha4*(*v);
732       y[5*(*idx)+4] += alpha5*(*v);
733       idx++; v++;
734     }
735   }
736   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
737   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
738   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
743 {
744   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
745   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
746   const PetscScalar *x,*v;
747   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
748   PetscErrorCode    ierr;
749   PetscInt          n,i,jrow,j;
750   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
751 
752   PetscFunctionBegin;
753   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
754   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
755   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
756   idx  = a->j;
757   v    = a->a;
758   ii   = a->i;
759 
760   for (i=0; i<m; i++) {
761     jrow = ii[i];
762     n    = ii[i+1] - jrow;
763     sum1 = 0.0;
764     sum2 = 0.0;
765     sum3 = 0.0;
766     sum4 = 0.0;
767     sum5 = 0.0;
768     for (j=0; j<n; j++) {
769       sum1 += v[jrow]*x[5*idx[jrow]];
770       sum2 += v[jrow]*x[5*idx[jrow]+1];
771       sum3 += v[jrow]*x[5*idx[jrow]+2];
772       sum4 += v[jrow]*x[5*idx[jrow]+3];
773       sum5 += v[jrow]*x[5*idx[jrow]+4];
774       jrow++;
775     }
776     y[5*i]   += sum1;
777     y[5*i+1] += sum2;
778     y[5*i+2] += sum3;
779     y[5*i+3] += sum4;
780     y[5*i+4] += sum5;
781   }
782 
783   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
784   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
785   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
786   PetscFunctionReturn(0);
787 }
788 
789 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
790 {
791   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
792   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
793   const PetscScalar *x,*v;
794   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
795   PetscErrorCode    ierr;
796   PetscInt          n,i;
797   const PetscInt    m = b->AIJ->rmap->n,*idx;
798 
799   PetscFunctionBegin;
800   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
801   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
802   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
803 
804   for (i=0; i<m; i++) {
805     idx    = a->j + a->i[i];
806     v      = a->a + a->i[i];
807     n      = a->i[i+1] - a->i[i];
808     alpha1 = x[5*i];
809     alpha2 = x[5*i+1];
810     alpha3 = x[5*i+2];
811     alpha4 = x[5*i+3];
812     alpha5 = x[5*i+4];
813     while (n-->0) {
814       y[5*(*idx)]   += alpha1*(*v);
815       y[5*(*idx)+1] += alpha2*(*v);
816       y[5*(*idx)+2] += alpha3*(*v);
817       y[5*(*idx)+3] += alpha4*(*v);
818       y[5*(*idx)+4] += alpha5*(*v);
819       idx++; v++;
820     }
821   }
822   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
823   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
824   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
825   PetscFunctionReturn(0);
826 }
827 
828 /* ------------------------------------------------------------------------------*/
829 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
830 {
831   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
832   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
833   const PetscScalar *x,*v;
834   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
835   PetscErrorCode    ierr;
836   PetscInt          nonzerorow=0,n,i,jrow,j;
837   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
838 
839   PetscFunctionBegin;
840   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
841   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
842   idx  = a->j;
843   v    = a->a;
844   ii   = a->i;
845 
846   for (i=0; i<m; i++) {
847     jrow  = ii[i];
848     n     = ii[i+1] - jrow;
849     sum1  = 0.0;
850     sum2  = 0.0;
851     sum3  = 0.0;
852     sum4  = 0.0;
853     sum5  = 0.0;
854     sum6  = 0.0;
855 
856     nonzerorow += (n>0);
857     for (j=0; j<n; j++) {
858       sum1 += v[jrow]*x[6*idx[jrow]];
859       sum2 += v[jrow]*x[6*idx[jrow]+1];
860       sum3 += v[jrow]*x[6*idx[jrow]+2];
861       sum4 += v[jrow]*x[6*idx[jrow]+3];
862       sum5 += v[jrow]*x[6*idx[jrow]+4];
863       sum6 += v[jrow]*x[6*idx[jrow]+5];
864       jrow++;
865     }
866     y[6*i]   = sum1;
867     y[6*i+1] = sum2;
868     y[6*i+2] = sum3;
869     y[6*i+3] = sum4;
870     y[6*i+4] = sum5;
871     y[6*i+5] = sum6;
872   }
873 
874   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
875   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
876   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
877   PetscFunctionReturn(0);
878 }
879 
880 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
881 {
882   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
883   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
884   const PetscScalar *x,*v;
885   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
886   PetscErrorCode    ierr;
887   PetscInt          n,i;
888   const PetscInt    m = b->AIJ->rmap->n,*idx;
889 
890   PetscFunctionBegin;
891   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
892   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
893   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
894 
895   for (i=0; i<m; i++) {
896     idx    = a->j + a->i[i];
897     v      = a->a + a->i[i];
898     n      = a->i[i+1] - a->i[i];
899     alpha1 = x[6*i];
900     alpha2 = x[6*i+1];
901     alpha3 = x[6*i+2];
902     alpha4 = x[6*i+3];
903     alpha5 = x[6*i+4];
904     alpha6 = x[6*i+5];
905     while (n-->0) {
906       y[6*(*idx)]   += alpha1*(*v);
907       y[6*(*idx)+1] += alpha2*(*v);
908       y[6*(*idx)+2] += alpha3*(*v);
909       y[6*(*idx)+3] += alpha4*(*v);
910       y[6*(*idx)+4] += alpha5*(*v);
911       y[6*(*idx)+5] += alpha6*(*v);
912       idx++; v++;
913     }
914   }
915   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
916   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
917   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
918   PetscFunctionReturn(0);
919 }
920 
921 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
922 {
923   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
924   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
925   const PetscScalar *x,*v;
926   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
927   PetscErrorCode    ierr;
928   PetscInt          n,i,jrow,j;
929   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
930 
931   PetscFunctionBegin;
932   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
933   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
934   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
935   idx  = a->j;
936   v    = a->a;
937   ii   = a->i;
938 
939   for (i=0; i<m; i++) {
940     jrow = ii[i];
941     n    = ii[i+1] - jrow;
942     sum1 = 0.0;
943     sum2 = 0.0;
944     sum3 = 0.0;
945     sum4 = 0.0;
946     sum5 = 0.0;
947     sum6 = 0.0;
948     for (j=0; j<n; j++) {
949       sum1 += v[jrow]*x[6*idx[jrow]];
950       sum2 += v[jrow]*x[6*idx[jrow]+1];
951       sum3 += v[jrow]*x[6*idx[jrow]+2];
952       sum4 += v[jrow]*x[6*idx[jrow]+3];
953       sum5 += v[jrow]*x[6*idx[jrow]+4];
954       sum6 += v[jrow]*x[6*idx[jrow]+5];
955       jrow++;
956     }
957     y[6*i]   += sum1;
958     y[6*i+1] += sum2;
959     y[6*i+2] += sum3;
960     y[6*i+3] += sum4;
961     y[6*i+4] += sum5;
962     y[6*i+5] += sum6;
963   }
964 
965   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
966   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
967   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
972 {
973   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
974   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
975   const PetscScalar *x,*v;
976   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
977   PetscErrorCode    ierr;
978   PetscInt          n,i;
979   const PetscInt    m = b->AIJ->rmap->n,*idx;
980 
981   PetscFunctionBegin;
982   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
983   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
984   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
985 
986   for (i=0; i<m; i++) {
987     idx    = a->j + a->i[i];
988     v      = a->a + a->i[i];
989     n      = a->i[i+1] - a->i[i];
990     alpha1 = x[6*i];
991     alpha2 = x[6*i+1];
992     alpha3 = x[6*i+2];
993     alpha4 = x[6*i+3];
994     alpha5 = x[6*i+4];
995     alpha6 = x[6*i+5];
996     while (n-->0) {
997       y[6*(*idx)]   += alpha1*(*v);
998       y[6*(*idx)+1] += alpha2*(*v);
999       y[6*(*idx)+2] += alpha3*(*v);
1000       y[6*(*idx)+3] += alpha4*(*v);
1001       y[6*(*idx)+4] += alpha5*(*v);
1002       y[6*(*idx)+5] += alpha6*(*v);
1003       idx++; v++;
1004     }
1005   }
1006   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1007   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1008   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 /* ------------------------------------------------------------------------------*/
1013 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1014 {
1015   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1016   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1017   const PetscScalar *x,*v;
1018   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1019   PetscErrorCode    ierr;
1020   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1021   PetscInt          nonzerorow=0,n,i,jrow,j;
1022 
1023   PetscFunctionBegin;
1024   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1025   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1026   idx  = a->j;
1027   v    = a->a;
1028   ii   = a->i;
1029 
1030   for (i=0; i<m; i++) {
1031     jrow  = ii[i];
1032     n     = ii[i+1] - jrow;
1033     sum1  = 0.0;
1034     sum2  = 0.0;
1035     sum3  = 0.0;
1036     sum4  = 0.0;
1037     sum5  = 0.0;
1038     sum6  = 0.0;
1039     sum7  = 0.0;
1040 
1041     nonzerorow += (n>0);
1042     for (j=0; j<n; j++) {
1043       sum1 += v[jrow]*x[7*idx[jrow]];
1044       sum2 += v[jrow]*x[7*idx[jrow]+1];
1045       sum3 += v[jrow]*x[7*idx[jrow]+2];
1046       sum4 += v[jrow]*x[7*idx[jrow]+3];
1047       sum5 += v[jrow]*x[7*idx[jrow]+4];
1048       sum6 += v[jrow]*x[7*idx[jrow]+5];
1049       sum7 += v[jrow]*x[7*idx[jrow]+6];
1050       jrow++;
1051     }
1052     y[7*i]   = sum1;
1053     y[7*i+1] = sum2;
1054     y[7*i+2] = sum3;
1055     y[7*i+3] = sum4;
1056     y[7*i+4] = sum5;
1057     y[7*i+5] = sum6;
1058     y[7*i+6] = sum7;
1059   }
1060 
1061   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
1062   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1063   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1068 {
1069   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1070   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1071   const PetscScalar *x,*v;
1072   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1073   PetscErrorCode    ierr;
1074   const PetscInt    m = b->AIJ->rmap->n,*idx;
1075   PetscInt          n,i;
1076 
1077   PetscFunctionBegin;
1078   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1079   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1080   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1081 
1082   for (i=0; i<m; i++) {
1083     idx    = a->j + a->i[i];
1084     v      = a->a + a->i[i];
1085     n      = a->i[i+1] - a->i[i];
1086     alpha1 = x[7*i];
1087     alpha2 = x[7*i+1];
1088     alpha3 = x[7*i+2];
1089     alpha4 = x[7*i+3];
1090     alpha5 = x[7*i+4];
1091     alpha6 = x[7*i+5];
1092     alpha7 = x[7*i+6];
1093     while (n-->0) {
1094       y[7*(*idx)]   += alpha1*(*v);
1095       y[7*(*idx)+1] += alpha2*(*v);
1096       y[7*(*idx)+2] += alpha3*(*v);
1097       y[7*(*idx)+3] += alpha4*(*v);
1098       y[7*(*idx)+4] += alpha5*(*v);
1099       y[7*(*idx)+5] += alpha6*(*v);
1100       y[7*(*idx)+6] += alpha7*(*v);
1101       idx++; v++;
1102     }
1103   }
1104   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1105   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1106   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1111 {
1112   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1113   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1114   const PetscScalar *x,*v;
1115   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1116   PetscErrorCode    ierr;
1117   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1118   PetscInt          n,i,jrow,j;
1119 
1120   PetscFunctionBegin;
1121   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1122   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1123   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1124   idx  = a->j;
1125   v    = a->a;
1126   ii   = a->i;
1127 
1128   for (i=0; i<m; i++) {
1129     jrow = ii[i];
1130     n    = ii[i+1] - jrow;
1131     sum1 = 0.0;
1132     sum2 = 0.0;
1133     sum3 = 0.0;
1134     sum4 = 0.0;
1135     sum5 = 0.0;
1136     sum6 = 0.0;
1137     sum7 = 0.0;
1138     for (j=0; j<n; j++) {
1139       sum1 += v[jrow]*x[7*idx[jrow]];
1140       sum2 += v[jrow]*x[7*idx[jrow]+1];
1141       sum3 += v[jrow]*x[7*idx[jrow]+2];
1142       sum4 += v[jrow]*x[7*idx[jrow]+3];
1143       sum5 += v[jrow]*x[7*idx[jrow]+4];
1144       sum6 += v[jrow]*x[7*idx[jrow]+5];
1145       sum7 += v[jrow]*x[7*idx[jrow]+6];
1146       jrow++;
1147     }
1148     y[7*i]   += sum1;
1149     y[7*i+1] += sum2;
1150     y[7*i+2] += sum3;
1151     y[7*i+3] += sum4;
1152     y[7*i+4] += sum5;
1153     y[7*i+5] += sum6;
1154     y[7*i+6] += sum7;
1155   }
1156 
1157   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1158   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1159   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1164 {
1165   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1166   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1167   const PetscScalar *x,*v;
1168   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1169   PetscErrorCode    ierr;
1170   const PetscInt    m = b->AIJ->rmap->n,*idx;
1171   PetscInt          n,i;
1172 
1173   PetscFunctionBegin;
1174   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1175   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1176   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1177   for (i=0; i<m; i++) {
1178     idx    = a->j + a->i[i];
1179     v      = a->a + a->i[i];
1180     n      = a->i[i+1] - a->i[i];
1181     alpha1 = x[7*i];
1182     alpha2 = x[7*i+1];
1183     alpha3 = x[7*i+2];
1184     alpha4 = x[7*i+3];
1185     alpha5 = x[7*i+4];
1186     alpha6 = x[7*i+5];
1187     alpha7 = x[7*i+6];
1188     while (n-->0) {
1189       y[7*(*idx)]   += alpha1*(*v);
1190       y[7*(*idx)+1] += alpha2*(*v);
1191       y[7*(*idx)+2] += alpha3*(*v);
1192       y[7*(*idx)+3] += alpha4*(*v);
1193       y[7*(*idx)+4] += alpha5*(*v);
1194       y[7*(*idx)+5] += alpha6*(*v);
1195       y[7*(*idx)+6] += alpha7*(*v);
1196       idx++; v++;
1197     }
1198   }
1199   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1200   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1201   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1206 {
1207   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1208   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1209   const PetscScalar *x,*v;
1210   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1211   PetscErrorCode    ierr;
1212   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1213   PetscInt          nonzerorow=0,n,i,jrow,j;
1214 
1215   PetscFunctionBegin;
1216   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1217   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1218   idx  = a->j;
1219   v    = a->a;
1220   ii   = a->i;
1221 
1222   for (i=0; i<m; i++) {
1223     jrow  = ii[i];
1224     n     = ii[i+1] - jrow;
1225     sum1  = 0.0;
1226     sum2  = 0.0;
1227     sum3  = 0.0;
1228     sum4  = 0.0;
1229     sum5  = 0.0;
1230     sum6  = 0.0;
1231     sum7  = 0.0;
1232     sum8  = 0.0;
1233 
1234     nonzerorow += (n>0);
1235     for (j=0; j<n; j++) {
1236       sum1 += v[jrow]*x[8*idx[jrow]];
1237       sum2 += v[jrow]*x[8*idx[jrow]+1];
1238       sum3 += v[jrow]*x[8*idx[jrow]+2];
1239       sum4 += v[jrow]*x[8*idx[jrow]+3];
1240       sum5 += v[jrow]*x[8*idx[jrow]+4];
1241       sum6 += v[jrow]*x[8*idx[jrow]+5];
1242       sum7 += v[jrow]*x[8*idx[jrow]+6];
1243       sum8 += v[jrow]*x[8*idx[jrow]+7];
1244       jrow++;
1245     }
1246     y[8*i]   = sum1;
1247     y[8*i+1] = sum2;
1248     y[8*i+2] = sum3;
1249     y[8*i+3] = sum4;
1250     y[8*i+4] = sum5;
1251     y[8*i+5] = sum6;
1252     y[8*i+6] = sum7;
1253     y[8*i+7] = sum8;
1254   }
1255 
1256   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
1257   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1258   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1263 {
1264   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1265   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1266   const PetscScalar *x,*v;
1267   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1268   PetscErrorCode    ierr;
1269   const PetscInt    m = b->AIJ->rmap->n,*idx;
1270   PetscInt          n,i;
1271 
1272   PetscFunctionBegin;
1273   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1274   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1275   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1276 
1277   for (i=0; i<m; i++) {
1278     idx    = a->j + a->i[i];
1279     v      = a->a + a->i[i];
1280     n      = a->i[i+1] - a->i[i];
1281     alpha1 = x[8*i];
1282     alpha2 = x[8*i+1];
1283     alpha3 = x[8*i+2];
1284     alpha4 = x[8*i+3];
1285     alpha5 = x[8*i+4];
1286     alpha6 = x[8*i+5];
1287     alpha7 = x[8*i+6];
1288     alpha8 = x[8*i+7];
1289     while (n-->0) {
1290       y[8*(*idx)]   += alpha1*(*v);
1291       y[8*(*idx)+1] += alpha2*(*v);
1292       y[8*(*idx)+2] += alpha3*(*v);
1293       y[8*(*idx)+3] += alpha4*(*v);
1294       y[8*(*idx)+4] += alpha5*(*v);
1295       y[8*(*idx)+5] += alpha6*(*v);
1296       y[8*(*idx)+6] += alpha7*(*v);
1297       y[8*(*idx)+7] += alpha8*(*v);
1298       idx++; v++;
1299     }
1300   }
1301   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1302   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1303   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1308 {
1309   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1310   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1311   const PetscScalar *x,*v;
1312   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1313   PetscErrorCode    ierr;
1314   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1315   PetscInt          n,i,jrow,j;
1316 
1317   PetscFunctionBegin;
1318   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1319   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1320   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1321   idx  = a->j;
1322   v    = a->a;
1323   ii   = a->i;
1324 
1325   for (i=0; i<m; i++) {
1326     jrow = ii[i];
1327     n    = ii[i+1] - jrow;
1328     sum1 = 0.0;
1329     sum2 = 0.0;
1330     sum3 = 0.0;
1331     sum4 = 0.0;
1332     sum5 = 0.0;
1333     sum6 = 0.0;
1334     sum7 = 0.0;
1335     sum8 = 0.0;
1336     for (j=0; j<n; j++) {
1337       sum1 += v[jrow]*x[8*idx[jrow]];
1338       sum2 += v[jrow]*x[8*idx[jrow]+1];
1339       sum3 += v[jrow]*x[8*idx[jrow]+2];
1340       sum4 += v[jrow]*x[8*idx[jrow]+3];
1341       sum5 += v[jrow]*x[8*idx[jrow]+4];
1342       sum6 += v[jrow]*x[8*idx[jrow]+5];
1343       sum7 += v[jrow]*x[8*idx[jrow]+6];
1344       sum8 += v[jrow]*x[8*idx[jrow]+7];
1345       jrow++;
1346     }
1347     y[8*i]   += sum1;
1348     y[8*i+1] += sum2;
1349     y[8*i+2] += sum3;
1350     y[8*i+3] += sum4;
1351     y[8*i+4] += sum5;
1352     y[8*i+5] += sum6;
1353     y[8*i+6] += sum7;
1354     y[8*i+7] += sum8;
1355   }
1356 
1357   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1358   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1359   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1364 {
1365   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1366   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1367   const PetscScalar *x,*v;
1368   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1369   PetscErrorCode    ierr;
1370   const PetscInt    m = b->AIJ->rmap->n,*idx;
1371   PetscInt          n,i;
1372 
1373   PetscFunctionBegin;
1374   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1375   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1376   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1377   for (i=0; i<m; i++) {
1378     idx    = a->j + a->i[i];
1379     v      = a->a + a->i[i];
1380     n      = a->i[i+1] - a->i[i];
1381     alpha1 = x[8*i];
1382     alpha2 = x[8*i+1];
1383     alpha3 = x[8*i+2];
1384     alpha4 = x[8*i+3];
1385     alpha5 = x[8*i+4];
1386     alpha6 = x[8*i+5];
1387     alpha7 = x[8*i+6];
1388     alpha8 = x[8*i+7];
1389     while (n-->0) {
1390       y[8*(*idx)]   += alpha1*(*v);
1391       y[8*(*idx)+1] += alpha2*(*v);
1392       y[8*(*idx)+2] += alpha3*(*v);
1393       y[8*(*idx)+3] += alpha4*(*v);
1394       y[8*(*idx)+4] += alpha5*(*v);
1395       y[8*(*idx)+5] += alpha6*(*v);
1396       y[8*(*idx)+6] += alpha7*(*v);
1397       y[8*(*idx)+7] += alpha8*(*v);
1398       idx++; v++;
1399     }
1400   }
1401   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1402   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1403   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 /* ------------------------------------------------------------------------------*/
1408 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1409 {
1410   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1411   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1412   const PetscScalar *x,*v;
1413   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1414   PetscErrorCode    ierr;
1415   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1416   PetscInt          nonzerorow=0,n,i,jrow,j;
1417 
1418   PetscFunctionBegin;
1419   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1420   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1421   idx  = a->j;
1422   v    = a->a;
1423   ii   = a->i;
1424 
1425   for (i=0; i<m; i++) {
1426     jrow  = ii[i];
1427     n     = ii[i+1] - jrow;
1428     sum1  = 0.0;
1429     sum2  = 0.0;
1430     sum3  = 0.0;
1431     sum4  = 0.0;
1432     sum5  = 0.0;
1433     sum6  = 0.0;
1434     sum7  = 0.0;
1435     sum8  = 0.0;
1436     sum9  = 0.0;
1437 
1438     nonzerorow += (n>0);
1439     for (j=0; j<n; j++) {
1440       sum1 += v[jrow]*x[9*idx[jrow]];
1441       sum2 += v[jrow]*x[9*idx[jrow]+1];
1442       sum3 += v[jrow]*x[9*idx[jrow]+2];
1443       sum4 += v[jrow]*x[9*idx[jrow]+3];
1444       sum5 += v[jrow]*x[9*idx[jrow]+4];
1445       sum6 += v[jrow]*x[9*idx[jrow]+5];
1446       sum7 += v[jrow]*x[9*idx[jrow]+6];
1447       sum8 += v[jrow]*x[9*idx[jrow]+7];
1448       sum9 += v[jrow]*x[9*idx[jrow]+8];
1449       jrow++;
1450     }
1451     y[9*i]   = sum1;
1452     y[9*i+1] = sum2;
1453     y[9*i+2] = sum3;
1454     y[9*i+3] = sum4;
1455     y[9*i+4] = sum5;
1456     y[9*i+5] = sum6;
1457     y[9*i+6] = sum7;
1458     y[9*i+7] = sum8;
1459     y[9*i+8] = sum9;
1460   }
1461 
1462   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
1463   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1464   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 /* ------------------------------------------------------------------------------*/
1469 
1470 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1471 {
1472   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1473   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1474   const PetscScalar *x,*v;
1475   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1476   PetscErrorCode    ierr;
1477   const PetscInt    m = b->AIJ->rmap->n,*idx;
1478   PetscInt          n,i;
1479 
1480   PetscFunctionBegin;
1481   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1482   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1483   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1484 
1485   for (i=0; i<m; i++) {
1486     idx    = a->j + a->i[i];
1487     v      = a->a + a->i[i];
1488     n      = a->i[i+1] - a->i[i];
1489     alpha1 = x[9*i];
1490     alpha2 = x[9*i+1];
1491     alpha3 = x[9*i+2];
1492     alpha4 = x[9*i+3];
1493     alpha5 = x[9*i+4];
1494     alpha6 = x[9*i+5];
1495     alpha7 = x[9*i+6];
1496     alpha8 = x[9*i+7];
1497     alpha9 = x[9*i+8];
1498     while (n-->0) {
1499       y[9*(*idx)]   += alpha1*(*v);
1500       y[9*(*idx)+1] += alpha2*(*v);
1501       y[9*(*idx)+2] += alpha3*(*v);
1502       y[9*(*idx)+3] += alpha4*(*v);
1503       y[9*(*idx)+4] += alpha5*(*v);
1504       y[9*(*idx)+5] += alpha6*(*v);
1505       y[9*(*idx)+6] += alpha7*(*v);
1506       y[9*(*idx)+7] += alpha8*(*v);
1507       y[9*(*idx)+8] += alpha9*(*v);
1508       idx++; v++;
1509     }
1510   }
1511   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1512   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1513   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1518 {
1519   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1520   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1521   const PetscScalar *x,*v;
1522   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1523   PetscErrorCode    ierr;
1524   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1525   PetscInt          n,i,jrow,j;
1526 
1527   PetscFunctionBegin;
1528   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1529   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1530   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1531   idx  = a->j;
1532   v    = a->a;
1533   ii   = a->i;
1534 
1535   for (i=0; i<m; i++) {
1536     jrow = ii[i];
1537     n    = ii[i+1] - jrow;
1538     sum1 = 0.0;
1539     sum2 = 0.0;
1540     sum3 = 0.0;
1541     sum4 = 0.0;
1542     sum5 = 0.0;
1543     sum6 = 0.0;
1544     sum7 = 0.0;
1545     sum8 = 0.0;
1546     sum9 = 0.0;
1547     for (j=0; j<n; j++) {
1548       sum1 += v[jrow]*x[9*idx[jrow]];
1549       sum2 += v[jrow]*x[9*idx[jrow]+1];
1550       sum3 += v[jrow]*x[9*idx[jrow]+2];
1551       sum4 += v[jrow]*x[9*idx[jrow]+3];
1552       sum5 += v[jrow]*x[9*idx[jrow]+4];
1553       sum6 += v[jrow]*x[9*idx[jrow]+5];
1554       sum7 += v[jrow]*x[9*idx[jrow]+6];
1555       sum8 += v[jrow]*x[9*idx[jrow]+7];
1556       sum9 += v[jrow]*x[9*idx[jrow]+8];
1557       jrow++;
1558     }
1559     y[9*i]   += sum1;
1560     y[9*i+1] += sum2;
1561     y[9*i+2] += sum3;
1562     y[9*i+3] += sum4;
1563     y[9*i+4] += sum5;
1564     y[9*i+5] += sum6;
1565     y[9*i+6] += sum7;
1566     y[9*i+7] += sum8;
1567     y[9*i+8] += sum9;
1568   }
1569 
1570   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1571   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1572   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1577 {
1578   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1579   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1580   const PetscScalar *x,*v;
1581   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1582   PetscErrorCode    ierr;
1583   const PetscInt    m = b->AIJ->rmap->n,*idx;
1584   PetscInt          n,i;
1585 
1586   PetscFunctionBegin;
1587   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1588   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1589   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1590   for (i=0; i<m; i++) {
1591     idx    = a->j + a->i[i];
1592     v      = a->a + a->i[i];
1593     n      = a->i[i+1] - a->i[i];
1594     alpha1 = x[9*i];
1595     alpha2 = x[9*i+1];
1596     alpha3 = x[9*i+2];
1597     alpha4 = x[9*i+3];
1598     alpha5 = x[9*i+4];
1599     alpha6 = x[9*i+5];
1600     alpha7 = x[9*i+6];
1601     alpha8 = x[9*i+7];
1602     alpha9 = x[9*i+8];
1603     while (n-->0) {
1604       y[9*(*idx)]   += alpha1*(*v);
1605       y[9*(*idx)+1] += alpha2*(*v);
1606       y[9*(*idx)+2] += alpha3*(*v);
1607       y[9*(*idx)+3] += alpha4*(*v);
1608       y[9*(*idx)+4] += alpha5*(*v);
1609       y[9*(*idx)+5] += alpha6*(*v);
1610       y[9*(*idx)+6] += alpha7*(*v);
1611       y[9*(*idx)+7] += alpha8*(*v);
1612       y[9*(*idx)+8] += alpha9*(*v);
1613       idx++; v++;
1614     }
1615   }
1616   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1617   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1618   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1619   PetscFunctionReturn(0);
1620 }
1621 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1622 {
1623   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1624   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1625   const PetscScalar *x,*v;
1626   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1627   PetscErrorCode    ierr;
1628   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1629   PetscInt          nonzerorow=0,n,i,jrow,j;
1630 
1631   PetscFunctionBegin;
1632   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1633   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1634   idx  = a->j;
1635   v    = a->a;
1636   ii   = a->i;
1637 
1638   for (i=0; i<m; i++) {
1639     jrow  = ii[i];
1640     n     = ii[i+1] - jrow;
1641     sum1  = 0.0;
1642     sum2  = 0.0;
1643     sum3  = 0.0;
1644     sum4  = 0.0;
1645     sum5  = 0.0;
1646     sum6  = 0.0;
1647     sum7  = 0.0;
1648     sum8  = 0.0;
1649     sum9  = 0.0;
1650     sum10 = 0.0;
1651 
1652     nonzerorow += (n>0);
1653     for (j=0; j<n; j++) {
1654       sum1  += v[jrow]*x[10*idx[jrow]];
1655       sum2  += v[jrow]*x[10*idx[jrow]+1];
1656       sum3  += v[jrow]*x[10*idx[jrow]+2];
1657       sum4  += v[jrow]*x[10*idx[jrow]+3];
1658       sum5  += v[jrow]*x[10*idx[jrow]+4];
1659       sum6  += v[jrow]*x[10*idx[jrow]+5];
1660       sum7  += v[jrow]*x[10*idx[jrow]+6];
1661       sum8  += v[jrow]*x[10*idx[jrow]+7];
1662       sum9  += v[jrow]*x[10*idx[jrow]+8];
1663       sum10 += v[jrow]*x[10*idx[jrow]+9];
1664       jrow++;
1665     }
1666     y[10*i]   = sum1;
1667     y[10*i+1] = sum2;
1668     y[10*i+2] = sum3;
1669     y[10*i+3] = sum4;
1670     y[10*i+4] = sum5;
1671     y[10*i+5] = sum6;
1672     y[10*i+6] = sum7;
1673     y[10*i+7] = sum8;
1674     y[10*i+8] = sum9;
1675     y[10*i+9] = sum10;
1676   }
1677 
1678   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
1679   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1680   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1685 {
1686   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1687   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1688   const PetscScalar *x,*v;
1689   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1690   PetscErrorCode    ierr;
1691   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1692   PetscInt          n,i,jrow,j;
1693 
1694   PetscFunctionBegin;
1695   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1696   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1697   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1698   idx  = a->j;
1699   v    = a->a;
1700   ii   = a->i;
1701 
1702   for (i=0; i<m; i++) {
1703     jrow  = ii[i];
1704     n     = ii[i+1] - jrow;
1705     sum1  = 0.0;
1706     sum2  = 0.0;
1707     sum3  = 0.0;
1708     sum4  = 0.0;
1709     sum5  = 0.0;
1710     sum6  = 0.0;
1711     sum7  = 0.0;
1712     sum8  = 0.0;
1713     sum9  = 0.0;
1714     sum10 = 0.0;
1715     for (j=0; j<n; j++) {
1716       sum1  += v[jrow]*x[10*idx[jrow]];
1717       sum2  += v[jrow]*x[10*idx[jrow]+1];
1718       sum3  += v[jrow]*x[10*idx[jrow]+2];
1719       sum4  += v[jrow]*x[10*idx[jrow]+3];
1720       sum5  += v[jrow]*x[10*idx[jrow]+4];
1721       sum6  += v[jrow]*x[10*idx[jrow]+5];
1722       sum7  += v[jrow]*x[10*idx[jrow]+6];
1723       sum8  += v[jrow]*x[10*idx[jrow]+7];
1724       sum9  += v[jrow]*x[10*idx[jrow]+8];
1725       sum10 += v[jrow]*x[10*idx[jrow]+9];
1726       jrow++;
1727     }
1728     y[10*i]   += sum1;
1729     y[10*i+1] += sum2;
1730     y[10*i+2] += sum3;
1731     y[10*i+3] += sum4;
1732     y[10*i+4] += sum5;
1733     y[10*i+5] += sum6;
1734     y[10*i+6] += sum7;
1735     y[10*i+7] += sum8;
1736     y[10*i+8] += sum9;
1737     y[10*i+9] += sum10;
1738   }
1739 
1740   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1741   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1742   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1747 {
1748   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1749   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1750   const PetscScalar *x,*v;
1751   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1752   PetscErrorCode    ierr;
1753   const PetscInt    m = b->AIJ->rmap->n,*idx;
1754   PetscInt          n,i;
1755 
1756   PetscFunctionBegin;
1757   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1758   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1759   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1760 
1761   for (i=0; i<m; i++) {
1762     idx     = a->j + a->i[i];
1763     v       = a->a + a->i[i];
1764     n       = a->i[i+1] - a->i[i];
1765     alpha1  = x[10*i];
1766     alpha2  = x[10*i+1];
1767     alpha3  = x[10*i+2];
1768     alpha4  = x[10*i+3];
1769     alpha5  = x[10*i+4];
1770     alpha6  = x[10*i+5];
1771     alpha7  = x[10*i+6];
1772     alpha8  = x[10*i+7];
1773     alpha9  = x[10*i+8];
1774     alpha10 = x[10*i+9];
1775     while (n-->0) {
1776       y[10*(*idx)]   += alpha1*(*v);
1777       y[10*(*idx)+1] += alpha2*(*v);
1778       y[10*(*idx)+2] += alpha3*(*v);
1779       y[10*(*idx)+3] += alpha4*(*v);
1780       y[10*(*idx)+4] += alpha5*(*v);
1781       y[10*(*idx)+5] += alpha6*(*v);
1782       y[10*(*idx)+6] += alpha7*(*v);
1783       y[10*(*idx)+7] += alpha8*(*v);
1784       y[10*(*idx)+8] += alpha9*(*v);
1785       y[10*(*idx)+9] += alpha10*(*v);
1786       idx++; v++;
1787     }
1788   }
1789   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1790   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1791   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1796 {
1797   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1798   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1799   const PetscScalar *x,*v;
1800   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1801   PetscErrorCode    ierr;
1802   const PetscInt    m = b->AIJ->rmap->n,*idx;
1803   PetscInt          n,i;
1804 
1805   PetscFunctionBegin;
1806   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1807   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1808   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1809   for (i=0; i<m; i++) {
1810     idx     = a->j + a->i[i];
1811     v       = a->a + a->i[i];
1812     n       = a->i[i+1] - a->i[i];
1813     alpha1  = x[10*i];
1814     alpha2  = x[10*i+1];
1815     alpha3  = x[10*i+2];
1816     alpha4  = x[10*i+3];
1817     alpha5  = x[10*i+4];
1818     alpha6  = x[10*i+5];
1819     alpha7  = x[10*i+6];
1820     alpha8  = x[10*i+7];
1821     alpha9  = x[10*i+8];
1822     alpha10 = x[10*i+9];
1823     while (n-->0) {
1824       y[10*(*idx)]   += alpha1*(*v);
1825       y[10*(*idx)+1] += alpha2*(*v);
1826       y[10*(*idx)+2] += alpha3*(*v);
1827       y[10*(*idx)+3] += alpha4*(*v);
1828       y[10*(*idx)+4] += alpha5*(*v);
1829       y[10*(*idx)+5] += alpha6*(*v);
1830       y[10*(*idx)+6] += alpha7*(*v);
1831       y[10*(*idx)+7] += alpha8*(*v);
1832       y[10*(*idx)+8] += alpha9*(*v);
1833       y[10*(*idx)+9] += alpha10*(*v);
1834       idx++; v++;
1835     }
1836   }
1837   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1838   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1839   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 
1844 /*--------------------------------------------------------------------------------------------*/
1845 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1846 {
1847   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1848   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1849   const PetscScalar *x,*v;
1850   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1851   PetscErrorCode    ierr;
1852   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1853   PetscInt          nonzerorow=0,n,i,jrow,j;
1854 
1855   PetscFunctionBegin;
1856   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1857   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1858   idx  = a->j;
1859   v    = a->a;
1860   ii   = a->i;
1861 
1862   for (i=0; i<m; i++) {
1863     jrow  = ii[i];
1864     n     = ii[i+1] - jrow;
1865     sum1  = 0.0;
1866     sum2  = 0.0;
1867     sum3  = 0.0;
1868     sum4  = 0.0;
1869     sum5  = 0.0;
1870     sum6  = 0.0;
1871     sum7  = 0.0;
1872     sum8  = 0.0;
1873     sum9  = 0.0;
1874     sum10 = 0.0;
1875     sum11 = 0.0;
1876 
1877     nonzerorow += (n>0);
1878     for (j=0; j<n; j++) {
1879       sum1  += v[jrow]*x[11*idx[jrow]];
1880       sum2  += v[jrow]*x[11*idx[jrow]+1];
1881       sum3  += v[jrow]*x[11*idx[jrow]+2];
1882       sum4  += v[jrow]*x[11*idx[jrow]+3];
1883       sum5  += v[jrow]*x[11*idx[jrow]+4];
1884       sum6  += v[jrow]*x[11*idx[jrow]+5];
1885       sum7  += v[jrow]*x[11*idx[jrow]+6];
1886       sum8  += v[jrow]*x[11*idx[jrow]+7];
1887       sum9  += v[jrow]*x[11*idx[jrow]+8];
1888       sum10 += v[jrow]*x[11*idx[jrow]+9];
1889       sum11 += v[jrow]*x[11*idx[jrow]+10];
1890       jrow++;
1891     }
1892     y[11*i]    = sum1;
1893     y[11*i+1]  = sum2;
1894     y[11*i+2]  = sum3;
1895     y[11*i+3]  = sum4;
1896     y[11*i+4]  = sum5;
1897     y[11*i+5]  = sum6;
1898     y[11*i+6]  = sum7;
1899     y[11*i+7]  = sum8;
1900     y[11*i+8]  = sum9;
1901     y[11*i+9]  = sum10;
1902     y[11*i+10] = sum11;
1903   }
1904 
1905   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
1906   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1907   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1912 {
1913   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1914   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1915   const PetscScalar *x,*v;
1916   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1917   PetscErrorCode    ierr;
1918   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1919   PetscInt          n,i,jrow,j;
1920 
1921   PetscFunctionBegin;
1922   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1923   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1924   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1925   idx  = a->j;
1926   v    = a->a;
1927   ii   = a->i;
1928 
1929   for (i=0; i<m; i++) {
1930     jrow  = ii[i];
1931     n     = ii[i+1] - jrow;
1932     sum1  = 0.0;
1933     sum2  = 0.0;
1934     sum3  = 0.0;
1935     sum4  = 0.0;
1936     sum5  = 0.0;
1937     sum6  = 0.0;
1938     sum7  = 0.0;
1939     sum8  = 0.0;
1940     sum9  = 0.0;
1941     sum10 = 0.0;
1942     sum11 = 0.0;
1943     for (j=0; j<n; j++) {
1944       sum1  += v[jrow]*x[11*idx[jrow]];
1945       sum2  += v[jrow]*x[11*idx[jrow]+1];
1946       sum3  += v[jrow]*x[11*idx[jrow]+2];
1947       sum4  += v[jrow]*x[11*idx[jrow]+3];
1948       sum5  += v[jrow]*x[11*idx[jrow]+4];
1949       sum6  += v[jrow]*x[11*idx[jrow]+5];
1950       sum7  += v[jrow]*x[11*idx[jrow]+6];
1951       sum8  += v[jrow]*x[11*idx[jrow]+7];
1952       sum9  += v[jrow]*x[11*idx[jrow]+8];
1953       sum10 += v[jrow]*x[11*idx[jrow]+9];
1954       sum11 += v[jrow]*x[11*idx[jrow]+10];
1955       jrow++;
1956     }
1957     y[11*i]    += sum1;
1958     y[11*i+1]  += sum2;
1959     y[11*i+2]  += sum3;
1960     y[11*i+3]  += sum4;
1961     y[11*i+4]  += sum5;
1962     y[11*i+5]  += sum6;
1963     y[11*i+6]  += sum7;
1964     y[11*i+7]  += sum8;
1965     y[11*i+8]  += sum9;
1966     y[11*i+9]  += sum10;
1967     y[11*i+10] += sum11;
1968   }
1969 
1970   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
1971   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1972   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1977 {
1978   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1979   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1980   const PetscScalar *x,*v;
1981   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1982   PetscErrorCode    ierr;
1983   const PetscInt    m = b->AIJ->rmap->n,*idx;
1984   PetscInt          n,i;
1985 
1986   PetscFunctionBegin;
1987   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1988   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1989   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1990 
1991   for (i=0; i<m; i++) {
1992     idx     = a->j + a->i[i];
1993     v       = a->a + a->i[i];
1994     n       = a->i[i+1] - a->i[i];
1995     alpha1  = x[11*i];
1996     alpha2  = x[11*i+1];
1997     alpha3  = x[11*i+2];
1998     alpha4  = x[11*i+3];
1999     alpha5  = x[11*i+4];
2000     alpha6  = x[11*i+5];
2001     alpha7  = x[11*i+6];
2002     alpha8  = x[11*i+7];
2003     alpha9  = x[11*i+8];
2004     alpha10 = x[11*i+9];
2005     alpha11 = x[11*i+10];
2006     while (n-->0) {
2007       y[11*(*idx)]    += alpha1*(*v);
2008       y[11*(*idx)+1]  += alpha2*(*v);
2009       y[11*(*idx)+2]  += alpha3*(*v);
2010       y[11*(*idx)+3]  += alpha4*(*v);
2011       y[11*(*idx)+4]  += alpha5*(*v);
2012       y[11*(*idx)+5]  += alpha6*(*v);
2013       y[11*(*idx)+6]  += alpha7*(*v);
2014       y[11*(*idx)+7]  += alpha8*(*v);
2015       y[11*(*idx)+8]  += alpha9*(*v);
2016       y[11*(*idx)+9]  += alpha10*(*v);
2017       y[11*(*idx)+10] += alpha11*(*v);
2018       idx++; v++;
2019     }
2020   }
2021   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2022   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2023   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2028 {
2029   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2030   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2031   const PetscScalar *x,*v;
2032   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2033   PetscErrorCode    ierr;
2034   const PetscInt    m = b->AIJ->rmap->n,*idx;
2035   PetscInt          n,i;
2036 
2037   PetscFunctionBegin;
2038   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2039   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2040   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2041   for (i=0; i<m; i++) {
2042     idx     = a->j + a->i[i];
2043     v       = a->a + a->i[i];
2044     n       = a->i[i+1] - a->i[i];
2045     alpha1  = x[11*i];
2046     alpha2  = x[11*i+1];
2047     alpha3  = x[11*i+2];
2048     alpha4  = x[11*i+3];
2049     alpha5  = x[11*i+4];
2050     alpha6  = x[11*i+5];
2051     alpha7  = x[11*i+6];
2052     alpha8  = x[11*i+7];
2053     alpha9  = x[11*i+8];
2054     alpha10 = x[11*i+9];
2055     alpha11 = x[11*i+10];
2056     while (n-->0) {
2057       y[11*(*idx)]    += alpha1*(*v);
2058       y[11*(*idx)+1]  += alpha2*(*v);
2059       y[11*(*idx)+2]  += alpha3*(*v);
2060       y[11*(*idx)+3]  += alpha4*(*v);
2061       y[11*(*idx)+4]  += alpha5*(*v);
2062       y[11*(*idx)+5]  += alpha6*(*v);
2063       y[11*(*idx)+6]  += alpha7*(*v);
2064       y[11*(*idx)+7]  += alpha8*(*v);
2065       y[11*(*idx)+8]  += alpha9*(*v);
2066       y[11*(*idx)+9]  += alpha10*(*v);
2067       y[11*(*idx)+10] += alpha11*(*v);
2068       idx++; v++;
2069     }
2070   }
2071   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2072   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2073   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 
2078 /*--------------------------------------------------------------------------------------------*/
2079 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2080 {
2081   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2082   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2083   const PetscScalar *x,*v;
2084   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2085   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2086   PetscErrorCode    ierr;
2087   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2088   PetscInt          nonzerorow=0,n,i,jrow,j;
2089 
2090   PetscFunctionBegin;
2091   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2092   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2093   idx  = a->j;
2094   v    = a->a;
2095   ii   = a->i;
2096 
2097   for (i=0; i<m; i++) {
2098     jrow  = ii[i];
2099     n     = ii[i+1] - jrow;
2100     sum1  = 0.0;
2101     sum2  = 0.0;
2102     sum3  = 0.0;
2103     sum4  = 0.0;
2104     sum5  = 0.0;
2105     sum6  = 0.0;
2106     sum7  = 0.0;
2107     sum8  = 0.0;
2108     sum9  = 0.0;
2109     sum10 = 0.0;
2110     sum11 = 0.0;
2111     sum12 = 0.0;
2112     sum13 = 0.0;
2113     sum14 = 0.0;
2114     sum15 = 0.0;
2115     sum16 = 0.0;
2116 
2117     nonzerorow += (n>0);
2118     for (j=0; j<n; j++) {
2119       sum1  += v[jrow]*x[16*idx[jrow]];
2120       sum2  += v[jrow]*x[16*idx[jrow]+1];
2121       sum3  += v[jrow]*x[16*idx[jrow]+2];
2122       sum4  += v[jrow]*x[16*idx[jrow]+3];
2123       sum5  += v[jrow]*x[16*idx[jrow]+4];
2124       sum6  += v[jrow]*x[16*idx[jrow]+5];
2125       sum7  += v[jrow]*x[16*idx[jrow]+6];
2126       sum8  += v[jrow]*x[16*idx[jrow]+7];
2127       sum9  += v[jrow]*x[16*idx[jrow]+8];
2128       sum10 += v[jrow]*x[16*idx[jrow]+9];
2129       sum11 += v[jrow]*x[16*idx[jrow]+10];
2130       sum12 += v[jrow]*x[16*idx[jrow]+11];
2131       sum13 += v[jrow]*x[16*idx[jrow]+12];
2132       sum14 += v[jrow]*x[16*idx[jrow]+13];
2133       sum15 += v[jrow]*x[16*idx[jrow]+14];
2134       sum16 += v[jrow]*x[16*idx[jrow]+15];
2135       jrow++;
2136     }
2137     y[16*i]    = sum1;
2138     y[16*i+1]  = sum2;
2139     y[16*i+2]  = sum3;
2140     y[16*i+3]  = sum4;
2141     y[16*i+4]  = sum5;
2142     y[16*i+5]  = sum6;
2143     y[16*i+6]  = sum7;
2144     y[16*i+7]  = sum8;
2145     y[16*i+8]  = sum9;
2146     y[16*i+9]  = sum10;
2147     y[16*i+10] = sum11;
2148     y[16*i+11] = sum12;
2149     y[16*i+12] = sum13;
2150     y[16*i+13] = sum14;
2151     y[16*i+14] = sum15;
2152     y[16*i+15] = sum16;
2153   }
2154 
2155   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
2156   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2157   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2158   PetscFunctionReturn(0);
2159 }
2160 
2161 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2162 {
2163   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2164   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2165   const PetscScalar *x,*v;
2166   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2167   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2168   PetscErrorCode    ierr;
2169   const PetscInt    m = b->AIJ->rmap->n,*idx;
2170   PetscInt          n,i;
2171 
2172   PetscFunctionBegin;
2173   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2174   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2175   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2176 
2177   for (i=0; i<m; i++) {
2178     idx     = a->j + a->i[i];
2179     v       = a->a + a->i[i];
2180     n       = a->i[i+1] - a->i[i];
2181     alpha1  = x[16*i];
2182     alpha2  = x[16*i+1];
2183     alpha3  = x[16*i+2];
2184     alpha4  = x[16*i+3];
2185     alpha5  = x[16*i+4];
2186     alpha6  = x[16*i+5];
2187     alpha7  = x[16*i+6];
2188     alpha8  = x[16*i+7];
2189     alpha9  = x[16*i+8];
2190     alpha10 = x[16*i+9];
2191     alpha11 = x[16*i+10];
2192     alpha12 = x[16*i+11];
2193     alpha13 = x[16*i+12];
2194     alpha14 = x[16*i+13];
2195     alpha15 = x[16*i+14];
2196     alpha16 = x[16*i+15];
2197     while (n-->0) {
2198       y[16*(*idx)]    += alpha1*(*v);
2199       y[16*(*idx)+1]  += alpha2*(*v);
2200       y[16*(*idx)+2]  += alpha3*(*v);
2201       y[16*(*idx)+3]  += alpha4*(*v);
2202       y[16*(*idx)+4]  += alpha5*(*v);
2203       y[16*(*idx)+5]  += alpha6*(*v);
2204       y[16*(*idx)+6]  += alpha7*(*v);
2205       y[16*(*idx)+7]  += alpha8*(*v);
2206       y[16*(*idx)+8]  += alpha9*(*v);
2207       y[16*(*idx)+9]  += alpha10*(*v);
2208       y[16*(*idx)+10] += alpha11*(*v);
2209       y[16*(*idx)+11] += alpha12*(*v);
2210       y[16*(*idx)+12] += alpha13*(*v);
2211       y[16*(*idx)+13] += alpha14*(*v);
2212       y[16*(*idx)+14] += alpha15*(*v);
2213       y[16*(*idx)+15] += alpha16*(*v);
2214       idx++; v++;
2215     }
2216   }
2217   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2218   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2219   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2220   PetscFunctionReturn(0);
2221 }
2222 
2223 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2224 {
2225   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2226   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2227   const PetscScalar *x,*v;
2228   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2229   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2230   PetscErrorCode    ierr;
2231   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2232   PetscInt          n,i,jrow,j;
2233 
2234   PetscFunctionBegin;
2235   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2236   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2237   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2238   idx  = a->j;
2239   v    = a->a;
2240   ii   = a->i;
2241 
2242   for (i=0; i<m; i++) {
2243     jrow  = ii[i];
2244     n     = ii[i+1] - jrow;
2245     sum1  = 0.0;
2246     sum2  = 0.0;
2247     sum3  = 0.0;
2248     sum4  = 0.0;
2249     sum5  = 0.0;
2250     sum6  = 0.0;
2251     sum7  = 0.0;
2252     sum8  = 0.0;
2253     sum9  = 0.0;
2254     sum10 = 0.0;
2255     sum11 = 0.0;
2256     sum12 = 0.0;
2257     sum13 = 0.0;
2258     sum14 = 0.0;
2259     sum15 = 0.0;
2260     sum16 = 0.0;
2261     for (j=0; j<n; j++) {
2262       sum1  += v[jrow]*x[16*idx[jrow]];
2263       sum2  += v[jrow]*x[16*idx[jrow]+1];
2264       sum3  += v[jrow]*x[16*idx[jrow]+2];
2265       sum4  += v[jrow]*x[16*idx[jrow]+3];
2266       sum5  += v[jrow]*x[16*idx[jrow]+4];
2267       sum6  += v[jrow]*x[16*idx[jrow]+5];
2268       sum7  += v[jrow]*x[16*idx[jrow]+6];
2269       sum8  += v[jrow]*x[16*idx[jrow]+7];
2270       sum9  += v[jrow]*x[16*idx[jrow]+8];
2271       sum10 += v[jrow]*x[16*idx[jrow]+9];
2272       sum11 += v[jrow]*x[16*idx[jrow]+10];
2273       sum12 += v[jrow]*x[16*idx[jrow]+11];
2274       sum13 += v[jrow]*x[16*idx[jrow]+12];
2275       sum14 += v[jrow]*x[16*idx[jrow]+13];
2276       sum15 += v[jrow]*x[16*idx[jrow]+14];
2277       sum16 += v[jrow]*x[16*idx[jrow]+15];
2278       jrow++;
2279     }
2280     y[16*i]    += sum1;
2281     y[16*i+1]  += sum2;
2282     y[16*i+2]  += sum3;
2283     y[16*i+3]  += sum4;
2284     y[16*i+4]  += sum5;
2285     y[16*i+5]  += sum6;
2286     y[16*i+6]  += sum7;
2287     y[16*i+7]  += sum8;
2288     y[16*i+8]  += sum9;
2289     y[16*i+9]  += sum10;
2290     y[16*i+10] += sum11;
2291     y[16*i+11] += sum12;
2292     y[16*i+12] += sum13;
2293     y[16*i+13] += sum14;
2294     y[16*i+14] += sum15;
2295     y[16*i+15] += sum16;
2296   }
2297 
2298   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2299   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2300   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2305 {
2306   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2307   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2308   const PetscScalar *x,*v;
2309   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2310   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2311   PetscErrorCode    ierr;
2312   const PetscInt    m = b->AIJ->rmap->n,*idx;
2313   PetscInt          n,i;
2314 
2315   PetscFunctionBegin;
2316   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2317   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2318   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2319   for (i=0; i<m; i++) {
2320     idx     = a->j + a->i[i];
2321     v       = a->a + a->i[i];
2322     n       = a->i[i+1] - a->i[i];
2323     alpha1  = x[16*i];
2324     alpha2  = x[16*i+1];
2325     alpha3  = x[16*i+2];
2326     alpha4  = x[16*i+3];
2327     alpha5  = x[16*i+4];
2328     alpha6  = x[16*i+5];
2329     alpha7  = x[16*i+6];
2330     alpha8  = x[16*i+7];
2331     alpha9  = x[16*i+8];
2332     alpha10 = x[16*i+9];
2333     alpha11 = x[16*i+10];
2334     alpha12 = x[16*i+11];
2335     alpha13 = x[16*i+12];
2336     alpha14 = x[16*i+13];
2337     alpha15 = x[16*i+14];
2338     alpha16 = x[16*i+15];
2339     while (n-->0) {
2340       y[16*(*idx)]    += alpha1*(*v);
2341       y[16*(*idx)+1]  += alpha2*(*v);
2342       y[16*(*idx)+2]  += alpha3*(*v);
2343       y[16*(*idx)+3]  += alpha4*(*v);
2344       y[16*(*idx)+4]  += alpha5*(*v);
2345       y[16*(*idx)+5]  += alpha6*(*v);
2346       y[16*(*idx)+6]  += alpha7*(*v);
2347       y[16*(*idx)+7]  += alpha8*(*v);
2348       y[16*(*idx)+8]  += alpha9*(*v);
2349       y[16*(*idx)+9]  += alpha10*(*v);
2350       y[16*(*idx)+10] += alpha11*(*v);
2351       y[16*(*idx)+11] += alpha12*(*v);
2352       y[16*(*idx)+12] += alpha13*(*v);
2353       y[16*(*idx)+13] += alpha14*(*v);
2354       y[16*(*idx)+14] += alpha15*(*v);
2355       y[16*(*idx)+15] += alpha16*(*v);
2356       idx++; v++;
2357     }
2358   }
2359   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2360   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2361   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 /*--------------------------------------------------------------------------------------------*/
2366 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2367 {
2368   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2369   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2370   const PetscScalar *x,*v;
2371   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2372   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2373   PetscErrorCode    ierr;
2374   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2375   PetscInt          nonzerorow=0,n,i,jrow,j;
2376 
2377   PetscFunctionBegin;
2378   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2379   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2380   idx  = a->j;
2381   v    = a->a;
2382   ii   = a->i;
2383 
2384   for (i=0; i<m; i++) {
2385     jrow  = ii[i];
2386     n     = ii[i+1] - jrow;
2387     sum1  = 0.0;
2388     sum2  = 0.0;
2389     sum3  = 0.0;
2390     sum4  = 0.0;
2391     sum5  = 0.0;
2392     sum6  = 0.0;
2393     sum7  = 0.0;
2394     sum8  = 0.0;
2395     sum9  = 0.0;
2396     sum10 = 0.0;
2397     sum11 = 0.0;
2398     sum12 = 0.0;
2399     sum13 = 0.0;
2400     sum14 = 0.0;
2401     sum15 = 0.0;
2402     sum16 = 0.0;
2403     sum17 = 0.0;
2404     sum18 = 0.0;
2405 
2406     nonzerorow += (n>0);
2407     for (j=0; j<n; j++) {
2408       sum1  += v[jrow]*x[18*idx[jrow]];
2409       sum2  += v[jrow]*x[18*idx[jrow]+1];
2410       sum3  += v[jrow]*x[18*idx[jrow]+2];
2411       sum4  += v[jrow]*x[18*idx[jrow]+3];
2412       sum5  += v[jrow]*x[18*idx[jrow]+4];
2413       sum6  += v[jrow]*x[18*idx[jrow]+5];
2414       sum7  += v[jrow]*x[18*idx[jrow]+6];
2415       sum8  += v[jrow]*x[18*idx[jrow]+7];
2416       sum9  += v[jrow]*x[18*idx[jrow]+8];
2417       sum10 += v[jrow]*x[18*idx[jrow]+9];
2418       sum11 += v[jrow]*x[18*idx[jrow]+10];
2419       sum12 += v[jrow]*x[18*idx[jrow]+11];
2420       sum13 += v[jrow]*x[18*idx[jrow]+12];
2421       sum14 += v[jrow]*x[18*idx[jrow]+13];
2422       sum15 += v[jrow]*x[18*idx[jrow]+14];
2423       sum16 += v[jrow]*x[18*idx[jrow]+15];
2424       sum17 += v[jrow]*x[18*idx[jrow]+16];
2425       sum18 += v[jrow]*x[18*idx[jrow]+17];
2426       jrow++;
2427     }
2428     y[18*i]    = sum1;
2429     y[18*i+1]  = sum2;
2430     y[18*i+2]  = sum3;
2431     y[18*i+3]  = sum4;
2432     y[18*i+4]  = sum5;
2433     y[18*i+5]  = sum6;
2434     y[18*i+6]  = sum7;
2435     y[18*i+7]  = sum8;
2436     y[18*i+8]  = sum9;
2437     y[18*i+9]  = sum10;
2438     y[18*i+10] = sum11;
2439     y[18*i+11] = sum12;
2440     y[18*i+12] = sum13;
2441     y[18*i+13] = sum14;
2442     y[18*i+14] = sum15;
2443     y[18*i+15] = sum16;
2444     y[18*i+16] = sum17;
2445     y[18*i+17] = sum18;
2446   }
2447 
2448   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
2449   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2450   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2455 {
2456   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2457   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2458   const PetscScalar *x,*v;
2459   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2460   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2461   PetscErrorCode    ierr;
2462   const PetscInt    m = b->AIJ->rmap->n,*idx;
2463   PetscInt          n,i;
2464 
2465   PetscFunctionBegin;
2466   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2467   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2468   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2469 
2470   for (i=0; i<m; i++) {
2471     idx     = a->j + a->i[i];
2472     v       = a->a + a->i[i];
2473     n       = a->i[i+1] - a->i[i];
2474     alpha1  = x[18*i];
2475     alpha2  = x[18*i+1];
2476     alpha3  = x[18*i+2];
2477     alpha4  = x[18*i+3];
2478     alpha5  = x[18*i+4];
2479     alpha6  = x[18*i+5];
2480     alpha7  = x[18*i+6];
2481     alpha8  = x[18*i+7];
2482     alpha9  = x[18*i+8];
2483     alpha10 = x[18*i+9];
2484     alpha11 = x[18*i+10];
2485     alpha12 = x[18*i+11];
2486     alpha13 = x[18*i+12];
2487     alpha14 = x[18*i+13];
2488     alpha15 = x[18*i+14];
2489     alpha16 = x[18*i+15];
2490     alpha17 = x[18*i+16];
2491     alpha18 = x[18*i+17];
2492     while (n-->0) {
2493       y[18*(*idx)]    += alpha1*(*v);
2494       y[18*(*idx)+1]  += alpha2*(*v);
2495       y[18*(*idx)+2]  += alpha3*(*v);
2496       y[18*(*idx)+3]  += alpha4*(*v);
2497       y[18*(*idx)+4]  += alpha5*(*v);
2498       y[18*(*idx)+5]  += alpha6*(*v);
2499       y[18*(*idx)+6]  += alpha7*(*v);
2500       y[18*(*idx)+7]  += alpha8*(*v);
2501       y[18*(*idx)+8]  += alpha9*(*v);
2502       y[18*(*idx)+9]  += alpha10*(*v);
2503       y[18*(*idx)+10] += alpha11*(*v);
2504       y[18*(*idx)+11] += alpha12*(*v);
2505       y[18*(*idx)+12] += alpha13*(*v);
2506       y[18*(*idx)+13] += alpha14*(*v);
2507       y[18*(*idx)+14] += alpha15*(*v);
2508       y[18*(*idx)+15] += alpha16*(*v);
2509       y[18*(*idx)+16] += alpha17*(*v);
2510       y[18*(*idx)+17] += alpha18*(*v);
2511       idx++; v++;
2512     }
2513   }
2514   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2515   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2516   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2521 {
2522   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2523   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2524   const PetscScalar *x,*v;
2525   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2526   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2527   PetscErrorCode    ierr;
2528   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2529   PetscInt          n,i,jrow,j;
2530 
2531   PetscFunctionBegin;
2532   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2533   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2534   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2535   idx  = a->j;
2536   v    = a->a;
2537   ii   = a->i;
2538 
2539   for (i=0; i<m; i++) {
2540     jrow  = ii[i];
2541     n     = ii[i+1] - jrow;
2542     sum1  = 0.0;
2543     sum2  = 0.0;
2544     sum3  = 0.0;
2545     sum4  = 0.0;
2546     sum5  = 0.0;
2547     sum6  = 0.0;
2548     sum7  = 0.0;
2549     sum8  = 0.0;
2550     sum9  = 0.0;
2551     sum10 = 0.0;
2552     sum11 = 0.0;
2553     sum12 = 0.0;
2554     sum13 = 0.0;
2555     sum14 = 0.0;
2556     sum15 = 0.0;
2557     sum16 = 0.0;
2558     sum17 = 0.0;
2559     sum18 = 0.0;
2560     for (j=0; j<n; j++) {
2561       sum1  += v[jrow]*x[18*idx[jrow]];
2562       sum2  += v[jrow]*x[18*idx[jrow]+1];
2563       sum3  += v[jrow]*x[18*idx[jrow]+2];
2564       sum4  += v[jrow]*x[18*idx[jrow]+3];
2565       sum5  += v[jrow]*x[18*idx[jrow]+4];
2566       sum6  += v[jrow]*x[18*idx[jrow]+5];
2567       sum7  += v[jrow]*x[18*idx[jrow]+6];
2568       sum8  += v[jrow]*x[18*idx[jrow]+7];
2569       sum9  += v[jrow]*x[18*idx[jrow]+8];
2570       sum10 += v[jrow]*x[18*idx[jrow]+9];
2571       sum11 += v[jrow]*x[18*idx[jrow]+10];
2572       sum12 += v[jrow]*x[18*idx[jrow]+11];
2573       sum13 += v[jrow]*x[18*idx[jrow]+12];
2574       sum14 += v[jrow]*x[18*idx[jrow]+13];
2575       sum15 += v[jrow]*x[18*idx[jrow]+14];
2576       sum16 += v[jrow]*x[18*idx[jrow]+15];
2577       sum17 += v[jrow]*x[18*idx[jrow]+16];
2578       sum18 += v[jrow]*x[18*idx[jrow]+17];
2579       jrow++;
2580     }
2581     y[18*i]    += sum1;
2582     y[18*i+1]  += sum2;
2583     y[18*i+2]  += sum3;
2584     y[18*i+3]  += sum4;
2585     y[18*i+4]  += sum5;
2586     y[18*i+5]  += sum6;
2587     y[18*i+6]  += sum7;
2588     y[18*i+7]  += sum8;
2589     y[18*i+8]  += sum9;
2590     y[18*i+9]  += sum10;
2591     y[18*i+10] += sum11;
2592     y[18*i+11] += sum12;
2593     y[18*i+12] += sum13;
2594     y[18*i+13] += sum14;
2595     y[18*i+14] += sum15;
2596     y[18*i+15] += sum16;
2597     y[18*i+16] += sum17;
2598     y[18*i+17] += sum18;
2599   }
2600 
2601   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2602   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2603   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2604   PetscFunctionReturn(0);
2605 }
2606 
2607 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2608 {
2609   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2610   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2611   const PetscScalar *x,*v;
2612   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2613   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2614   PetscErrorCode    ierr;
2615   const PetscInt    m = b->AIJ->rmap->n,*idx;
2616   PetscInt          n,i;
2617 
2618   PetscFunctionBegin;
2619   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2620   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2621   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2622   for (i=0; i<m; i++) {
2623     idx     = a->j + a->i[i];
2624     v       = a->a + a->i[i];
2625     n       = a->i[i+1] - a->i[i];
2626     alpha1  = x[18*i];
2627     alpha2  = x[18*i+1];
2628     alpha3  = x[18*i+2];
2629     alpha4  = x[18*i+3];
2630     alpha5  = x[18*i+4];
2631     alpha6  = x[18*i+5];
2632     alpha7  = x[18*i+6];
2633     alpha8  = x[18*i+7];
2634     alpha9  = x[18*i+8];
2635     alpha10 = x[18*i+9];
2636     alpha11 = x[18*i+10];
2637     alpha12 = x[18*i+11];
2638     alpha13 = x[18*i+12];
2639     alpha14 = x[18*i+13];
2640     alpha15 = x[18*i+14];
2641     alpha16 = x[18*i+15];
2642     alpha17 = x[18*i+16];
2643     alpha18 = x[18*i+17];
2644     while (n-->0) {
2645       y[18*(*idx)]    += alpha1*(*v);
2646       y[18*(*idx)+1]  += alpha2*(*v);
2647       y[18*(*idx)+2]  += alpha3*(*v);
2648       y[18*(*idx)+3]  += alpha4*(*v);
2649       y[18*(*idx)+4]  += alpha5*(*v);
2650       y[18*(*idx)+5]  += alpha6*(*v);
2651       y[18*(*idx)+6]  += alpha7*(*v);
2652       y[18*(*idx)+7]  += alpha8*(*v);
2653       y[18*(*idx)+8]  += alpha9*(*v);
2654       y[18*(*idx)+9]  += alpha10*(*v);
2655       y[18*(*idx)+10] += alpha11*(*v);
2656       y[18*(*idx)+11] += alpha12*(*v);
2657       y[18*(*idx)+12] += alpha13*(*v);
2658       y[18*(*idx)+13] += alpha14*(*v);
2659       y[18*(*idx)+14] += alpha15*(*v);
2660       y[18*(*idx)+15] += alpha16*(*v);
2661       y[18*(*idx)+16] += alpha17*(*v);
2662       y[18*(*idx)+17] += alpha18*(*v);
2663       idx++; v++;
2664     }
2665   }
2666   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2667   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2668   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2669   PetscFunctionReturn(0);
2670 }
2671 
2672 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2673 {
2674   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2675   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2676   const PetscScalar *x,*v;
2677   PetscScalar       *y,*sums;
2678   PetscErrorCode    ierr;
2679   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2680   PetscInt          n,i,jrow,j,dof = b->dof,k;
2681 
2682   PetscFunctionBegin;
2683   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2684   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2685   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2686   idx  = a->j;
2687   v    = a->a;
2688   ii   = a->i;
2689 
2690   for (i=0; i<m; i++) {
2691     jrow = ii[i];
2692     n    = ii[i+1] - jrow;
2693     sums = y + dof*i;
2694     for (j=0; j<n; j++) {
2695       for (k=0; k<dof; k++) {
2696         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2697       }
2698       jrow++;
2699     }
2700   }
2701 
2702   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2703   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2704   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2705   PetscFunctionReturn(0);
2706 }
2707 
2708 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2709 {
2710   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2711   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2712   const PetscScalar *x,*v;
2713   PetscScalar       *y,*sums;
2714   PetscErrorCode    ierr;
2715   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2716   PetscInt          n,i,jrow,j,dof = b->dof,k;
2717 
2718   PetscFunctionBegin;
2719   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2720   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2721   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2722   idx  = a->j;
2723   v    = a->a;
2724   ii   = a->i;
2725 
2726   for (i=0; i<m; i++) {
2727     jrow = ii[i];
2728     n    = ii[i+1] - jrow;
2729     sums = y + dof*i;
2730     for (j=0; j<n; j++) {
2731       for (k=0; k<dof; k++) {
2732         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2733       }
2734       jrow++;
2735     }
2736   }
2737 
2738   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2739   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2740   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2741   PetscFunctionReturn(0);
2742 }
2743 
2744 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2745 {
2746   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2747   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2748   const PetscScalar *x,*v,*alpha;
2749   PetscScalar       *y;
2750   PetscErrorCode    ierr;
2751   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2752   PetscInt          n,i,k;
2753 
2754   PetscFunctionBegin;
2755   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2756   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2757   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2758   for (i=0; i<m; i++) {
2759     idx   = a->j + a->i[i];
2760     v     = a->a + a->i[i];
2761     n     = a->i[i+1] - a->i[i];
2762     alpha = x + dof*i;
2763     while (n-->0) {
2764       for (k=0; k<dof; k++) {
2765         y[dof*(*idx)+k] += alpha[k]*(*v);
2766       }
2767       idx++; v++;
2768     }
2769   }
2770   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2771   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2772   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2773   PetscFunctionReturn(0);
2774 }
2775 
2776 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2777 {
2778   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2779   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2780   const PetscScalar *x,*v,*alpha;
2781   PetscScalar       *y;
2782   PetscErrorCode    ierr;
2783   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2784   PetscInt          n,i,k;
2785 
2786   PetscFunctionBegin;
2787   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2788   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2789   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2790   for (i=0; i<m; i++) {
2791     idx   = a->j + a->i[i];
2792     v     = a->a + a->i[i];
2793     n     = a->i[i+1] - a->i[i];
2794     alpha = x + dof*i;
2795     while (n-->0) {
2796       for (k=0; k<dof; k++) {
2797         y[dof*(*idx)+k] += alpha[k]*(*v);
2798       }
2799       idx++; v++;
2800     }
2801   }
2802   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2803   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2804   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 /*===================================================================================*/
2809 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2810 {
2811   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2812   PetscErrorCode ierr;
2813 
2814   PetscFunctionBegin;
2815   /* start the scatter */
2816   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2817   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2818   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2819   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2820   PetscFunctionReturn(0);
2821 }
2822 
2823 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2824 {
2825   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2826   PetscErrorCode ierr;
2827 
2828   PetscFunctionBegin;
2829   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2830   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2831   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2832   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2833   PetscFunctionReturn(0);
2834 }
2835 
2836 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2837 {
2838   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2839   PetscErrorCode ierr;
2840 
2841   PetscFunctionBegin;
2842   /* start the scatter */
2843   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2844   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2845   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2846   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2847   PetscFunctionReturn(0);
2848 }
2849 
2850 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2851 {
2852   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2853   PetscErrorCode ierr;
2854 
2855   PetscFunctionBegin;
2856   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2857   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2858   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2859   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2860   PetscFunctionReturn(0);
2861 }
2862 
2863 /* ----------------------------------------------------------------*/
2864 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2865 {
2866   PetscErrorCode     ierr;
2867   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2868   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2869   Mat                P         =pp->AIJ;
2870   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2871   PetscInt           *pti,*ptj,*ptJ;
2872   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2873   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2874   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2875   MatScalar          *ca;
2876   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2877 
2878   PetscFunctionBegin;
2879   /* Get ij structure of P^T */
2880   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2881 
2882   cn = pn*ppdof;
2883   /* Allocate ci array, arrays for fill computation and */
2884   /* free space for accumulating nonzero column info */
2885   ierr  = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr);
2886   ci[0] = 0;
2887 
2888   /* Work arrays for rows of P^T*A */
2889   ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr);
2890   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
2891   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
2892 
2893   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2894   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2895   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2896   ierr          = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr);
2897   current_space = free_space;
2898 
2899   /* Determine symbolic info for each row of C: */
2900   for (i=0; i<pn; i++) {
2901     ptnzi = pti[i+1] - pti[i];
2902     ptJ   = ptj + pti[i];
2903     for (dof=0; dof<ppdof; dof++) {
2904       ptanzi = 0;
2905       /* Determine symbolic row of PtA: */
2906       for (j=0; j<ptnzi; j++) {
2907         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2908         arow = ptJ[j]*ppdof + dof;
2909         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2910         anzj = ai[arow+1] - ai[arow];
2911         ajj  = aj + ai[arow];
2912         for (k=0; k<anzj; k++) {
2913           if (!ptadenserow[ajj[k]]) {
2914             ptadenserow[ajj[k]]    = -1;
2915             ptasparserow[ptanzi++] = ajj[k];
2916           }
2917         }
2918       }
2919       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2920       ptaj = ptasparserow;
2921       cnzi = 0;
2922       for (j=0; j<ptanzi; j++) {
2923         /* Get offset within block of P */
2924         pshift = *ptaj%ppdof;
2925         /* Get block row of P */
2926         prow = (*ptaj++)/ppdof; /* integer division */
2927         /* P has same number of nonzeros per row as the compressed form */
2928         pnzj = pi[prow+1] - pi[prow];
2929         pjj  = pj + pi[prow];
2930         for (k=0;k<pnzj;k++) {
2931           /* Locations in C are shifted by the offset within the block */
2932           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2933           if (!denserow[pjj[k]*ppdof+pshift]) {
2934             denserow[pjj[k]*ppdof+pshift] = -1;
2935             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2936           }
2937         }
2938       }
2939 
2940       /* sort sparserow */
2941       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
2942 
2943       /* If free space is not available, make more free space */
2944       /* Double the amount of total space in the list */
2945       if (current_space->local_remaining<cnzi) {
2946         ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
2947       }
2948 
2949       /* Copy data into free space, and zero out denserows */
2950       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
2951 
2952       current_space->array           += cnzi;
2953       current_space->local_used      += cnzi;
2954       current_space->local_remaining -= cnzi;
2955 
2956       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
2957       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
2958 
2959       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2960       /*        For now, we will recompute what is needed. */
2961       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2962     }
2963   }
2964   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2965   /* Allocate space for cj, initialize cj, and */
2966   /* destroy list of free space and other temporary array(s) */
2967   ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr);
2968   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
2969   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
2970 
2971   /* Allocate space for ca */
2972   ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr);
2973 
2974   /* put together the new matrix */
2975   ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
2976   ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr);
2977 
2978   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2979   /* Since these are PETSc arrays, change flags to free them as necessary. */
2980   c          = (Mat_SeqAIJ*)((*C)->data);
2981   c->free_a  = PETSC_TRUE;
2982   c->free_ij = PETSC_TRUE;
2983   c->nonew   = 0;
2984 
2985   (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2986 
2987   /* Clean up. */
2988   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2989   PetscFunctionReturn(0);
2990 }
2991 
2992 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2993 {
2994   /* This routine requires testing -- first draft only */
2995   PetscErrorCode  ierr;
2996   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
2997   Mat             P  =pp->AIJ;
2998   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
2999   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
3000   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3001   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3002   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3003   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3004   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3005   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3006   MatScalar       *ca=c->a,*caj,*apa;
3007 
3008   PetscFunctionBegin;
3009   /* Allocate temporary array for storage of one row of A*P */
3010   ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr);
3011 
3012   /* Clear old values in C */
3013   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
3014 
3015   for (i=0; i<am; i++) {
3016     /* Form sparse row of A*P */
3017     anzi  = ai[i+1] - ai[i];
3018     apnzj = 0;
3019     for (j=0; j<anzi; j++) {
3020       /* Get offset within block of P */
3021       pshift = *aj%ppdof;
3022       /* Get block row of P */
3023       prow = *aj++/ppdof;   /* integer division */
3024       pnzj = pi[prow+1] - pi[prow];
3025       pjj  = pj + pi[prow];
3026       paj  = pa + pi[prow];
3027       for (k=0; k<pnzj; k++) {
3028         poffset = pjj[k]*ppdof+pshift;
3029         if (!apjdense[poffset]) {
3030           apjdense[poffset] = -1;
3031           apj[apnzj++]      = poffset;
3032         }
3033         apa[poffset] += (*aa)*paj[k];
3034       }
3035       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
3036       aa++;
3037     }
3038 
3039     /* Sort the j index array for quick sparse axpy. */
3040     /* Note: a array does not need sorting as it is in dense storage locations. */
3041     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
3042 
3043     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3044     prow    = i/ppdof; /* integer division */
3045     pshift  = i%ppdof;
3046     poffset = pi[prow];
3047     pnzi    = pi[prow+1] - poffset;
3048     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3049     pJ = pj+poffset;
3050     pA = pa+poffset;
3051     for (j=0; j<pnzi; j++) {
3052       crow = (*pJ)*ppdof+pshift;
3053       cjj  = cj + ci[crow];
3054       caj  = ca + ci[crow];
3055       pJ++;
3056       /* Perform sparse axpy operation.  Note cjj includes apj. */
3057       for (k=0,nextap=0; nextap<apnzj; k++) {
3058         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3059       }
3060       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
3061       pA++;
3062     }
3063 
3064     /* Zero the current row info for A*P */
3065     for (j=0; j<apnzj; j++) {
3066       apa[apj[j]]      = 0.;
3067       apjdense[apj[j]] = 0;
3068     }
3069   }
3070 
3071   /* Assemble the final matrix and clean up */
3072   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3073   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3074   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
3075   PetscFunctionReturn(0);
3076 }
3077 
3078 PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3079 {
3080   PetscErrorCode ierr;
3081 
3082   PetscFunctionBegin;
3083   if (scall == MAT_INITIAL_MATRIX) {
3084     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3085     ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr);
3086     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3087   }
3088   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3089   ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr);
3090   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3091   PetscFunctionReturn(0);
3092 }
3093 
3094 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3095 {
3096   PetscErrorCode ierr;
3097 
3098   PetscFunctionBegin;
3099   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3100   ierr = MatConvert(PP,MATMPIAIJ,MAT_INPLACE_MATRIX,&PP);CHKERRQ(ierr);
3101   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
3102   PetscFunctionReturn(0);
3103 }
3104 
3105 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3106 {
3107   PetscFunctionBegin;
3108   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3109   PetscFunctionReturn(0);
3110 }
3111 
3112 PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3113 {
3114   PetscErrorCode ierr;
3115 
3116   PetscFunctionBegin;
3117   if (scall == MAT_INITIAL_MATRIX) {
3118     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3119     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr);
3120     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3121   }
3122   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3123   ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr);
3124   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3125   PetscFunctionReturn(0);
3126 }
3127 
3128 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3129 {
3130   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3131   Mat            a    = b->AIJ,B;
3132   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3133   PetscErrorCode ierr;
3134   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3135   PetscInt       *cols;
3136   PetscScalar    *vals;
3137 
3138   PetscFunctionBegin;
3139   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3140   ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr);
3141   for (i=0; i<m; i++) {
3142     nmax = PetscMax(nmax,aij->ilen[i]);
3143     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3144   }
3145   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
3146   ierr = PetscFree(ilen);CHKERRQ(ierr);
3147   ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr);
3148   ii   = 0;
3149   for (i=0; i<m; i++) {
3150     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3151     for (j=0; j<dof; j++) {
3152       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3153       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3154       ii++;
3155     }
3156     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3157   }
3158   ierr = PetscFree(icols);CHKERRQ(ierr);
3159   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3160   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3161 
3162   if (reuse == MAT_INPLACE_MATRIX) {
3163     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3164   } else {
3165     *newmat = B;
3166   }
3167   PetscFunctionReturn(0);
3168 }
3169 
3170 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3171 
3172 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3173 {
3174   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3175   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3176   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3177   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3178   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3179   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3180   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3181   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3182   PetscInt       rstart,cstart,*garray,ii,k;
3183   PetscErrorCode ierr;
3184   PetscScalar    *vals,*ovals;
3185 
3186   PetscFunctionBegin;
3187   ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr);
3188   for (i=0; i<A->rmap->n/dof; i++) {
3189     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3190     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3191     for (j=0; j<dof; j++) {
3192       dnz[dof*i+j] = AIJ->ilen[i];
3193       onz[dof*i+j] = OAIJ->ilen[i];
3194     }
3195   }
3196   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
3197   ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr);
3198   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
3199 
3200   ierr   = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr);
3201   rstart = dof*maij->A->rmap->rstart;
3202   cstart = dof*maij->A->cmap->rstart;
3203   garray = mpiaij->garray;
3204 
3205   ii = rstart;
3206   for (i=0; i<A->rmap->n/dof; i++) {
3207     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3208     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3209     for (j=0; j<dof; j++) {
3210       for (k=0; k<ncols; k++) {
3211         icols[k] = cstart + dof*cols[k]+j;
3212       }
3213       for (k=0; k<oncols; k++) {
3214         oicols[k] = dof*garray[ocols[k]]+j;
3215       }
3216       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3217       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
3218       ii++;
3219     }
3220     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3221     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3222   }
3223   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
3224 
3225   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3226   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3227 
3228   if (reuse == MAT_INPLACE_MATRIX) {
3229     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3230     ((PetscObject)A)->refct = 1;
3231 
3232     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3233 
3234     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3235   } else {
3236     *newmat = B;
3237   }
3238   PetscFunctionReturn(0);
3239 }
3240 
3241 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3242 {
3243   PetscErrorCode ierr;
3244   Mat            A;
3245 
3246   PetscFunctionBegin;
3247   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3248   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
3249   ierr = MatDestroy(&A);CHKERRQ(ierr);
3250   PetscFunctionReturn(0);
3251 }
3252 
3253 /* ---------------------------------------------------------------------------------- */
3254 /*@
3255   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3256   operations for multicomponent problems.  It interpolates each component the same
3257   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3258   and MATMPIAIJ for distributed matrices.
3259 
3260   Collective
3261 
3262   Input Parameters:
3263 + A - the AIJ matrix describing the action on blocks
3264 - dof - the block size (number of components per node)
3265 
3266   Output Parameter:
3267 . maij - the new MAIJ matrix
3268 
3269   Operations provided:
3270 + MatMult
3271 . MatMultTranspose
3272 . MatMultAdd
3273 . MatMultTransposeAdd
3274 - MatView
3275 
3276   Level: advanced
3277 
3278 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3279 @*/
3280 PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3281 {
3282   PetscErrorCode ierr;
3283   PetscMPIInt    size;
3284   PetscInt       n;
3285   Mat            B;
3286 
3287   PetscFunctionBegin;
3288   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3289 
3290   if (dof == 1) *maij = A;
3291   else {
3292     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3293     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3294     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3295     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3296     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3297     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3298 
3299     B->assembled = PETSC_TRUE;
3300 
3301     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3302     if (size == 1) {
3303       Mat_SeqMAIJ *b;
3304 
3305       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
3306 
3307       B->ops->setup   = NULL;
3308       B->ops->destroy = MatDestroy_SeqMAIJ;
3309       B->ops->view    = MatView_SeqMAIJ;
3310       b               = (Mat_SeqMAIJ*)B->data;
3311       b->dof          = dof;
3312       b->AIJ          = A;
3313 
3314       if (dof == 2) {
3315         B->ops->mult             = MatMult_SeqMAIJ_2;
3316         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3317         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3318         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3319       } else if (dof == 3) {
3320         B->ops->mult             = MatMult_SeqMAIJ_3;
3321         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3322         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3323         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3324       } else if (dof == 4) {
3325         B->ops->mult             = MatMult_SeqMAIJ_4;
3326         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3327         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3328         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3329       } else if (dof == 5) {
3330         B->ops->mult             = MatMult_SeqMAIJ_5;
3331         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3332         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3333         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3334       } else if (dof == 6) {
3335         B->ops->mult             = MatMult_SeqMAIJ_6;
3336         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3337         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3338         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3339       } else if (dof == 7) {
3340         B->ops->mult             = MatMult_SeqMAIJ_7;
3341         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3342         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3343         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3344       } else if (dof == 8) {
3345         B->ops->mult             = MatMult_SeqMAIJ_8;
3346         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3347         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3348         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3349       } else if (dof == 9) {
3350         B->ops->mult             = MatMult_SeqMAIJ_9;
3351         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3352         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3353         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3354       } else if (dof == 10) {
3355         B->ops->mult             = MatMult_SeqMAIJ_10;
3356         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3357         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3358         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3359       } else if (dof == 11) {
3360         B->ops->mult             = MatMult_SeqMAIJ_11;
3361         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3362         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3363         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3364       } else if (dof == 16) {
3365         B->ops->mult             = MatMult_SeqMAIJ_16;
3366         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3367         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3368         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3369       } else if (dof == 18) {
3370         B->ops->mult             = MatMult_SeqMAIJ_18;
3371         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3372         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3373         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3374       } else {
3375         B->ops->mult             = MatMult_SeqMAIJ_N;
3376         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3377         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3378         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3379       }
3380       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3381       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3382       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijperm_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3383       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijmkl_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3384     } else {
3385       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3386       Mat_MPIMAIJ *b;
3387       IS          from,to;
3388       Vec         gvec;
3389 
3390       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3391 
3392       B->ops->setup   = NULL;
3393       B->ops->destroy = MatDestroy_MPIMAIJ;
3394       B->ops->view    = MatView_MPIMAIJ;
3395 
3396       b      = (Mat_MPIMAIJ*)B->data;
3397       b->dof = dof;
3398       b->A   = A;
3399 
3400       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
3401       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
3402 
3403       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3404       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3405       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
3406       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3407       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3408 
3409       /* create two temporary Index sets for build scatter gather */
3410       ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3411       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3412 
3413       /* create temporary global vector to generate scatter context */
3414       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
3415 
3416       /* generate the scatter context */
3417       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3418 
3419       ierr = ISDestroy(&from);CHKERRQ(ierr);
3420       ierr = ISDestroy(&to);CHKERRQ(ierr);
3421       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3422 
3423       B->ops->mult             = MatMult_MPIMAIJ_dof;
3424       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3425       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3426       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3427 
3428       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3429       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr);
3430     }
3431     B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ;
3432     ierr  = MatSetUp(B);CHKERRQ(ierr);
3433     *maij = B;
3434     ierr  = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr);
3435   }
3436   PetscFunctionReturn(0);
3437 }
3438