xref: /petsc/src/mat/impls/maij/maij.c (revision f155c23239e6d3f7c7ec79ff00b4f28519d0ce99)
1 
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20 #include <../src/mat/utils/freespace.h>
21 
22 /*@
23    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
24 
25    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
26 
27    Input Parameter:
28 .  A - the MAIJ matrix
29 
30    Output Parameter:
31 .  B - the AIJ matrix
32 
33    Level: advanced
34 
35    Notes:
36     The reference count on the AIJ matrix is not increased so you should not destroy it.
37 
38 .seealso: MatCreateMAIJ()
39 @*/
40 PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
41 {
42   PetscErrorCode ierr;
43   PetscBool      ismpimaij,isseqmaij;
44 
45   PetscFunctionBegin;
46   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
47   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
48   if (ismpimaij) {
49     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
50 
51     *B = b->A;
52   } else if (isseqmaij) {
53     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
54 
55     *B = b->AIJ;
56   } else {
57     *B = A;
58   }
59   PetscFunctionReturn(0);
60 }
61 
62 /*@
63    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
64 
65    Logically Collective
66 
67    Input Parameters:
68 +  A - the MAIJ matrix
69 -  dof - the block size for the new matrix
70 
71    Output Parameter:
72 .  B - the new MAIJ matrix
73 
74    Level: advanced
75 
76 .seealso: MatCreateMAIJ()
77 @*/
78 PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
79 {
80   PetscErrorCode ierr;
81   Mat            Aij = NULL;
82 
83   PetscFunctionBegin;
84   PetscValidLogicalCollectiveInt(A,dof,2);
85   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
86   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
91 {
92   PetscErrorCode ierr;
93   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
94 
95   PetscFunctionBegin;
96   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
97   ierr = PetscFree(A->data);CHKERRQ(ierr);
98   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr);
99   ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL);CHKERRQ(ierr);
100   PetscFunctionReturn(0);
101 }
102 
103 PetscErrorCode MatSetUp_MAIJ(Mat A)
104 {
105   PetscFunctionBegin;
106   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
107 }
108 
109 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
110 {
111   PetscErrorCode ierr;
112   Mat            B;
113 
114   PetscFunctionBegin;
115   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
116   ierr = MatView(B,viewer);CHKERRQ(ierr);
117   ierr = MatDestroy(&B);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
122 {
123   PetscErrorCode ierr;
124   Mat            B;
125 
126   PetscFunctionBegin;
127   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
128   ierr = MatView(B,viewer);CHKERRQ(ierr);
129   ierr = MatDestroy(&B);CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
134 {
135   PetscErrorCode ierr;
136   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
137 
138   PetscFunctionBegin;
139   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
140   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
141   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
142   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
143   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
144   ierr = PetscFree(A->data);CHKERRQ(ierr);
145   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr);
146   ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr);
147   ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr);
148   PetscFunctionReturn(0);
149 }
150 
151 /*MC
152   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
153   multicomponent problems, interpolating or restricting each component the same way independently.
154   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
155 
156   Operations provided:
157 .vb
158     MatMult
159     MatMultTranspose
160     MatMultAdd
161     MatMultTransposeAdd
162 .ve
163 
164   Level: advanced
165 
166 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
167 M*/
168 
169 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
170 {
171   PetscErrorCode ierr;
172   Mat_MPIMAIJ    *b;
173   PetscMPIInt    size;
174 
175   PetscFunctionBegin;
176   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
177   A->data  = (void*)b;
178 
179   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
180 
181   A->ops->setup = MatSetUp_MAIJ;
182 
183   b->AIJ  = NULL;
184   b->dof  = 0;
185   b->OAIJ = NULL;
186   b->ctx  = NULL;
187   b->w    = NULL;
188   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
189   if (size == 1) {
190     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
191   } else {
192     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
193   }
194   A->preallocated  = PETSC_TRUE;
195   A->assembled     = PETSC_TRUE;
196   PetscFunctionReturn(0);
197 }
198 
199 /* --------------------------------------------------------------------------------------*/
200 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
201 {
202   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
203   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
204   const PetscScalar *x,*v;
205   PetscScalar       *y, sum1, sum2;
206   PetscErrorCode    ierr;
207   PetscInt          nonzerorow=0,n,i,jrow,j;
208   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
209 
210   PetscFunctionBegin;
211   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
212   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
213   idx  = a->j;
214   v    = a->a;
215   ii   = a->i;
216 
217   for (i=0; i<m; i++) {
218     jrow  = ii[i];
219     n     = ii[i+1] - jrow;
220     sum1  = 0.0;
221     sum2  = 0.0;
222 
223     nonzerorow += (n>0);
224     for (j=0; j<n; j++) {
225       sum1 += v[jrow]*x[2*idx[jrow]];
226       sum2 += v[jrow]*x[2*idx[jrow]+1];
227       jrow++;
228     }
229     y[2*i]   = sum1;
230     y[2*i+1] = sum2;
231   }
232 
233   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
234   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
235   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
236   PetscFunctionReturn(0);
237 }
238 
239 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
240 {
241   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
242   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
243   const PetscScalar *x,*v;
244   PetscScalar       *y,alpha1,alpha2;
245   PetscErrorCode    ierr;
246   PetscInt          n,i;
247   const PetscInt    m = b->AIJ->rmap->n,*idx;
248 
249   PetscFunctionBegin;
250   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
251   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
252   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
253 
254   for (i=0; i<m; i++) {
255     idx    = a->j + a->i[i];
256     v      = a->a + a->i[i];
257     n      = a->i[i+1] - a->i[i];
258     alpha1 = x[2*i];
259     alpha2 = x[2*i+1];
260     while (n-->0) {
261       y[2*(*idx)]   += alpha1*(*v);
262       y[2*(*idx)+1] += alpha2*(*v);
263       idx++; v++;
264     }
265   }
266   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
267   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
268   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
269   PetscFunctionReturn(0);
270 }
271 
272 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
273 {
274   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
275   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
276   const PetscScalar *x,*v;
277   PetscScalar       *y,sum1, sum2;
278   PetscErrorCode    ierr;
279   PetscInt          n,i,jrow,j;
280   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
281 
282   PetscFunctionBegin;
283   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
284   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
285   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
286   idx  = a->j;
287   v    = a->a;
288   ii   = a->i;
289 
290   for (i=0; i<m; i++) {
291     jrow = ii[i];
292     n    = ii[i+1] - jrow;
293     sum1 = 0.0;
294     sum2 = 0.0;
295     for (j=0; j<n; j++) {
296       sum1 += v[jrow]*x[2*idx[jrow]];
297       sum2 += v[jrow]*x[2*idx[jrow]+1];
298       jrow++;
299     }
300     y[2*i]   += sum1;
301     y[2*i+1] += sum2;
302   }
303 
304   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
305   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
306   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
310 {
311   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
312   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
313   const PetscScalar *x,*v;
314   PetscScalar       *y,alpha1,alpha2;
315   PetscErrorCode    ierr;
316   PetscInt          n,i;
317   const PetscInt    m = b->AIJ->rmap->n,*idx;
318 
319   PetscFunctionBegin;
320   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
321   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
322   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
323 
324   for (i=0; i<m; i++) {
325     idx    = a->j + a->i[i];
326     v      = a->a + a->i[i];
327     n      = a->i[i+1] - a->i[i];
328     alpha1 = x[2*i];
329     alpha2 = x[2*i+1];
330     while (n-->0) {
331       y[2*(*idx)]   += alpha1*(*v);
332       y[2*(*idx)+1] += alpha2*(*v);
333       idx++; v++;
334     }
335   }
336   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
337   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
338   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
339   PetscFunctionReturn(0);
340 }
341 /* --------------------------------------------------------------------------------------*/
342 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
343 {
344   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
345   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
346   const PetscScalar *x,*v;
347   PetscScalar       *y,sum1, sum2, sum3;
348   PetscErrorCode    ierr;
349   PetscInt          nonzerorow=0,n,i,jrow,j;
350   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
351 
352   PetscFunctionBegin;
353   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
354   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
355   idx  = a->j;
356   v    = a->a;
357   ii   = a->i;
358 
359   for (i=0; i<m; i++) {
360     jrow  = ii[i];
361     n     = ii[i+1] - jrow;
362     sum1  = 0.0;
363     sum2  = 0.0;
364     sum3  = 0.0;
365 
366     nonzerorow += (n>0);
367     for (j=0; j<n; j++) {
368       sum1 += v[jrow]*x[3*idx[jrow]];
369       sum2 += v[jrow]*x[3*idx[jrow]+1];
370       sum3 += v[jrow]*x[3*idx[jrow]+2];
371       jrow++;
372      }
373     y[3*i]   = sum1;
374     y[3*i+1] = sum2;
375     y[3*i+2] = sum3;
376   }
377 
378   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
379   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
380   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
381   PetscFunctionReturn(0);
382 }
383 
384 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
385 {
386   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
387   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
388   const PetscScalar *x,*v;
389   PetscScalar       *y,alpha1,alpha2,alpha3;
390   PetscErrorCode    ierr;
391   PetscInt          n,i;
392   const PetscInt    m = b->AIJ->rmap->n,*idx;
393 
394   PetscFunctionBegin;
395   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
396   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
397   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
398 
399   for (i=0; i<m; i++) {
400     idx    = a->j + a->i[i];
401     v      = a->a + a->i[i];
402     n      = a->i[i+1] - a->i[i];
403     alpha1 = x[3*i];
404     alpha2 = x[3*i+1];
405     alpha3 = x[3*i+2];
406     while (n-->0) {
407       y[3*(*idx)]   += alpha1*(*v);
408       y[3*(*idx)+1] += alpha2*(*v);
409       y[3*(*idx)+2] += alpha3*(*v);
410       idx++; v++;
411     }
412   }
413   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
414   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
415   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
420 {
421   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
422   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
423   const PetscScalar *x,*v;
424   PetscScalar       *y,sum1, sum2, sum3;
425   PetscErrorCode    ierr;
426   PetscInt          n,i,jrow,j;
427   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
428 
429   PetscFunctionBegin;
430   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
431   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
432   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
433   idx  = a->j;
434   v    = a->a;
435   ii   = a->i;
436 
437   for (i=0; i<m; i++) {
438     jrow = ii[i];
439     n    = ii[i+1] - jrow;
440     sum1 = 0.0;
441     sum2 = 0.0;
442     sum3 = 0.0;
443     for (j=0; j<n; j++) {
444       sum1 += v[jrow]*x[3*idx[jrow]];
445       sum2 += v[jrow]*x[3*idx[jrow]+1];
446       sum3 += v[jrow]*x[3*idx[jrow]+2];
447       jrow++;
448     }
449     y[3*i]   += sum1;
450     y[3*i+1] += sum2;
451     y[3*i+2] += sum3;
452   }
453 
454   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
455   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
456   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
457   PetscFunctionReturn(0);
458 }
459 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
460 {
461   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
462   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
463   const PetscScalar *x,*v;
464   PetscScalar       *y,alpha1,alpha2,alpha3;
465   PetscErrorCode    ierr;
466   PetscInt          n,i;
467   const PetscInt    m = b->AIJ->rmap->n,*idx;
468 
469   PetscFunctionBegin;
470   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
471   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
472   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
473   for (i=0; i<m; i++) {
474     idx    = a->j + a->i[i];
475     v      = a->a + a->i[i];
476     n      = a->i[i+1] - a->i[i];
477     alpha1 = x[3*i];
478     alpha2 = x[3*i+1];
479     alpha3 = x[3*i+2];
480     while (n-->0) {
481       y[3*(*idx)]   += alpha1*(*v);
482       y[3*(*idx)+1] += alpha2*(*v);
483       y[3*(*idx)+2] += alpha3*(*v);
484       idx++; v++;
485     }
486   }
487   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
488   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
489   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 /* ------------------------------------------------------------------------------*/
494 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
495 {
496   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
497   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
498   const PetscScalar *x,*v;
499   PetscScalar       *y,sum1, sum2, sum3, sum4;
500   PetscErrorCode    ierr;
501   PetscInt          nonzerorow=0,n,i,jrow,j;
502   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
503 
504   PetscFunctionBegin;
505   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
506   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
507   idx  = a->j;
508   v    = a->a;
509   ii   = a->i;
510 
511   for (i=0; i<m; i++) {
512     jrow        = ii[i];
513     n           = ii[i+1] - jrow;
514     sum1        = 0.0;
515     sum2        = 0.0;
516     sum3        = 0.0;
517     sum4        = 0.0;
518     nonzerorow += (n>0);
519     for (j=0; j<n; j++) {
520       sum1 += v[jrow]*x[4*idx[jrow]];
521       sum2 += v[jrow]*x[4*idx[jrow]+1];
522       sum3 += v[jrow]*x[4*idx[jrow]+2];
523       sum4 += v[jrow]*x[4*idx[jrow]+3];
524       jrow++;
525     }
526     y[4*i]   = sum1;
527     y[4*i+1] = sum2;
528     y[4*i+2] = sum3;
529     y[4*i+3] = sum4;
530   }
531 
532   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
533   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
534   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
535   PetscFunctionReturn(0);
536 }
537 
538 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
539 {
540   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
541   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
542   const PetscScalar *x,*v;
543   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
544   PetscErrorCode    ierr;
545   PetscInt          n,i;
546   const PetscInt    m = b->AIJ->rmap->n,*idx;
547 
548   PetscFunctionBegin;
549   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
550   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
551   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
552   for (i=0; i<m; i++) {
553     idx    = a->j + a->i[i];
554     v      = a->a + a->i[i];
555     n      = a->i[i+1] - a->i[i];
556     alpha1 = x[4*i];
557     alpha2 = x[4*i+1];
558     alpha3 = x[4*i+2];
559     alpha4 = x[4*i+3];
560     while (n-->0) {
561       y[4*(*idx)]   += alpha1*(*v);
562       y[4*(*idx)+1] += alpha2*(*v);
563       y[4*(*idx)+2] += alpha3*(*v);
564       y[4*(*idx)+3] += alpha4*(*v);
565       idx++; v++;
566     }
567   }
568   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
569   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
570   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
571   PetscFunctionReturn(0);
572 }
573 
574 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
575 {
576   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
577   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
578   const PetscScalar *x,*v;
579   PetscScalar       *y,sum1, sum2, sum3, sum4;
580   PetscErrorCode    ierr;
581   PetscInt          n,i,jrow,j;
582   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
583 
584   PetscFunctionBegin;
585   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
586   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
587   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
588   idx  = a->j;
589   v    = a->a;
590   ii   = a->i;
591 
592   for (i=0; i<m; i++) {
593     jrow = ii[i];
594     n    = ii[i+1] - jrow;
595     sum1 = 0.0;
596     sum2 = 0.0;
597     sum3 = 0.0;
598     sum4 = 0.0;
599     for (j=0; j<n; j++) {
600       sum1 += v[jrow]*x[4*idx[jrow]];
601       sum2 += v[jrow]*x[4*idx[jrow]+1];
602       sum3 += v[jrow]*x[4*idx[jrow]+2];
603       sum4 += v[jrow]*x[4*idx[jrow]+3];
604       jrow++;
605     }
606     y[4*i]   += sum1;
607     y[4*i+1] += sum2;
608     y[4*i+2] += sum3;
609     y[4*i+3] += sum4;
610   }
611 
612   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
613   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
614   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
615   PetscFunctionReturn(0);
616 }
617 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
618 {
619   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
620   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
621   const PetscScalar *x,*v;
622   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
623   PetscErrorCode    ierr;
624   PetscInt          n,i;
625   const PetscInt    m = b->AIJ->rmap->n,*idx;
626 
627   PetscFunctionBegin;
628   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
629   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
630   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
631 
632   for (i=0; i<m; i++) {
633     idx    = a->j + a->i[i];
634     v      = a->a + a->i[i];
635     n      = a->i[i+1] - a->i[i];
636     alpha1 = x[4*i];
637     alpha2 = x[4*i+1];
638     alpha3 = x[4*i+2];
639     alpha4 = x[4*i+3];
640     while (n-->0) {
641       y[4*(*idx)]   += alpha1*(*v);
642       y[4*(*idx)+1] += alpha2*(*v);
643       y[4*(*idx)+2] += alpha3*(*v);
644       y[4*(*idx)+3] += alpha4*(*v);
645       idx++; v++;
646     }
647   }
648   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
649   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
650   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
651   PetscFunctionReturn(0);
652 }
653 /* ------------------------------------------------------------------------------*/
654 
655 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
656 {
657   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
658   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
659   const PetscScalar *x,*v;
660   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
661   PetscErrorCode    ierr;
662   PetscInt          nonzerorow=0,n,i,jrow,j;
663   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
664 
665   PetscFunctionBegin;
666   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
667   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
668   idx  = a->j;
669   v    = a->a;
670   ii   = a->i;
671 
672   for (i=0; i<m; i++) {
673     jrow  = ii[i];
674     n     = ii[i+1] - jrow;
675     sum1  = 0.0;
676     sum2  = 0.0;
677     sum3  = 0.0;
678     sum4  = 0.0;
679     sum5  = 0.0;
680 
681     nonzerorow += (n>0);
682     for (j=0; j<n; j++) {
683       sum1 += v[jrow]*x[5*idx[jrow]];
684       sum2 += v[jrow]*x[5*idx[jrow]+1];
685       sum3 += v[jrow]*x[5*idx[jrow]+2];
686       sum4 += v[jrow]*x[5*idx[jrow]+3];
687       sum5 += v[jrow]*x[5*idx[jrow]+4];
688       jrow++;
689     }
690     y[5*i]   = sum1;
691     y[5*i+1] = sum2;
692     y[5*i+2] = sum3;
693     y[5*i+3] = sum4;
694     y[5*i+4] = sum5;
695   }
696 
697   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
698   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
699   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
704 {
705   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
706   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
707   const PetscScalar *x,*v;
708   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
709   PetscErrorCode    ierr;
710   PetscInt          n,i;
711   const PetscInt    m = b->AIJ->rmap->n,*idx;
712 
713   PetscFunctionBegin;
714   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
715   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
716   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
717 
718   for (i=0; i<m; i++) {
719     idx    = a->j + a->i[i];
720     v      = a->a + a->i[i];
721     n      = a->i[i+1] - a->i[i];
722     alpha1 = x[5*i];
723     alpha2 = x[5*i+1];
724     alpha3 = x[5*i+2];
725     alpha4 = x[5*i+3];
726     alpha5 = x[5*i+4];
727     while (n-->0) {
728       y[5*(*idx)]   += alpha1*(*v);
729       y[5*(*idx)+1] += alpha2*(*v);
730       y[5*(*idx)+2] += alpha3*(*v);
731       y[5*(*idx)+3] += alpha4*(*v);
732       y[5*(*idx)+4] += alpha5*(*v);
733       idx++; v++;
734     }
735   }
736   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
737   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
738   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
743 {
744   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
745   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
746   const PetscScalar *x,*v;
747   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
748   PetscErrorCode    ierr;
749   PetscInt          n,i,jrow,j;
750   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
751 
752   PetscFunctionBegin;
753   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
754   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
755   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
756   idx  = a->j;
757   v    = a->a;
758   ii   = a->i;
759 
760   for (i=0; i<m; i++) {
761     jrow = ii[i];
762     n    = ii[i+1] - jrow;
763     sum1 = 0.0;
764     sum2 = 0.0;
765     sum3 = 0.0;
766     sum4 = 0.0;
767     sum5 = 0.0;
768     for (j=0; j<n; j++) {
769       sum1 += v[jrow]*x[5*idx[jrow]];
770       sum2 += v[jrow]*x[5*idx[jrow]+1];
771       sum3 += v[jrow]*x[5*idx[jrow]+2];
772       sum4 += v[jrow]*x[5*idx[jrow]+3];
773       sum5 += v[jrow]*x[5*idx[jrow]+4];
774       jrow++;
775     }
776     y[5*i]   += sum1;
777     y[5*i+1] += sum2;
778     y[5*i+2] += sum3;
779     y[5*i+3] += sum4;
780     y[5*i+4] += sum5;
781   }
782 
783   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
784   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
785   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
786   PetscFunctionReturn(0);
787 }
788 
789 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
790 {
791   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
792   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
793   const PetscScalar *x,*v;
794   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
795   PetscErrorCode    ierr;
796   PetscInt          n,i;
797   const PetscInt    m = b->AIJ->rmap->n,*idx;
798 
799   PetscFunctionBegin;
800   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
801   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
802   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
803 
804   for (i=0; i<m; i++) {
805     idx    = a->j + a->i[i];
806     v      = a->a + a->i[i];
807     n      = a->i[i+1] - a->i[i];
808     alpha1 = x[5*i];
809     alpha2 = x[5*i+1];
810     alpha3 = x[5*i+2];
811     alpha4 = x[5*i+3];
812     alpha5 = x[5*i+4];
813     while (n-->0) {
814       y[5*(*idx)]   += alpha1*(*v);
815       y[5*(*idx)+1] += alpha2*(*v);
816       y[5*(*idx)+2] += alpha3*(*v);
817       y[5*(*idx)+3] += alpha4*(*v);
818       y[5*(*idx)+4] += alpha5*(*v);
819       idx++; v++;
820     }
821   }
822   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
823   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
824   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
825   PetscFunctionReturn(0);
826 }
827 
828 /* ------------------------------------------------------------------------------*/
829 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
830 {
831   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
832   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
833   const PetscScalar *x,*v;
834   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
835   PetscErrorCode    ierr;
836   PetscInt          nonzerorow=0,n,i,jrow,j;
837   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
838 
839   PetscFunctionBegin;
840   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
841   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
842   idx  = a->j;
843   v    = a->a;
844   ii   = a->i;
845 
846   for (i=0; i<m; i++) {
847     jrow  = ii[i];
848     n     = ii[i+1] - jrow;
849     sum1  = 0.0;
850     sum2  = 0.0;
851     sum3  = 0.0;
852     sum4  = 0.0;
853     sum5  = 0.0;
854     sum6  = 0.0;
855 
856     nonzerorow += (n>0);
857     for (j=0; j<n; j++) {
858       sum1 += v[jrow]*x[6*idx[jrow]];
859       sum2 += v[jrow]*x[6*idx[jrow]+1];
860       sum3 += v[jrow]*x[6*idx[jrow]+2];
861       sum4 += v[jrow]*x[6*idx[jrow]+3];
862       sum5 += v[jrow]*x[6*idx[jrow]+4];
863       sum6 += v[jrow]*x[6*idx[jrow]+5];
864       jrow++;
865     }
866     y[6*i]   = sum1;
867     y[6*i+1] = sum2;
868     y[6*i+2] = sum3;
869     y[6*i+3] = sum4;
870     y[6*i+4] = sum5;
871     y[6*i+5] = sum6;
872   }
873 
874   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
875   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
876   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
877   PetscFunctionReturn(0);
878 }
879 
880 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
881 {
882   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
883   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
884   const PetscScalar *x,*v;
885   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
886   PetscErrorCode    ierr;
887   PetscInt          n,i;
888   const PetscInt    m = b->AIJ->rmap->n,*idx;
889 
890   PetscFunctionBegin;
891   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
892   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
893   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
894 
895   for (i=0; i<m; i++) {
896     idx    = a->j + a->i[i];
897     v      = a->a + a->i[i];
898     n      = a->i[i+1] - a->i[i];
899     alpha1 = x[6*i];
900     alpha2 = x[6*i+1];
901     alpha3 = x[6*i+2];
902     alpha4 = x[6*i+3];
903     alpha5 = x[6*i+4];
904     alpha6 = x[6*i+5];
905     while (n-->0) {
906       y[6*(*idx)]   += alpha1*(*v);
907       y[6*(*idx)+1] += alpha2*(*v);
908       y[6*(*idx)+2] += alpha3*(*v);
909       y[6*(*idx)+3] += alpha4*(*v);
910       y[6*(*idx)+4] += alpha5*(*v);
911       y[6*(*idx)+5] += alpha6*(*v);
912       idx++; v++;
913     }
914   }
915   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
916   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
917   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
918   PetscFunctionReturn(0);
919 }
920 
921 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
922 {
923   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
924   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
925   const PetscScalar *x,*v;
926   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
927   PetscErrorCode    ierr;
928   PetscInt          n,i,jrow,j;
929   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
930 
931   PetscFunctionBegin;
932   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
933   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
934   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
935   idx  = a->j;
936   v    = a->a;
937   ii   = a->i;
938 
939   for (i=0; i<m; i++) {
940     jrow = ii[i];
941     n    = ii[i+1] - jrow;
942     sum1 = 0.0;
943     sum2 = 0.0;
944     sum3 = 0.0;
945     sum4 = 0.0;
946     sum5 = 0.0;
947     sum6 = 0.0;
948     for (j=0; j<n; j++) {
949       sum1 += v[jrow]*x[6*idx[jrow]];
950       sum2 += v[jrow]*x[6*idx[jrow]+1];
951       sum3 += v[jrow]*x[6*idx[jrow]+2];
952       sum4 += v[jrow]*x[6*idx[jrow]+3];
953       sum5 += v[jrow]*x[6*idx[jrow]+4];
954       sum6 += v[jrow]*x[6*idx[jrow]+5];
955       jrow++;
956     }
957     y[6*i]   += sum1;
958     y[6*i+1] += sum2;
959     y[6*i+2] += sum3;
960     y[6*i+3] += sum4;
961     y[6*i+4] += sum5;
962     y[6*i+5] += sum6;
963   }
964 
965   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
966   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
967   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
972 {
973   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
974   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
975   const PetscScalar *x,*v;
976   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
977   PetscErrorCode    ierr;
978   PetscInt          n,i;
979   const PetscInt    m = b->AIJ->rmap->n,*idx;
980 
981   PetscFunctionBegin;
982   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
983   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
984   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
985 
986   for (i=0; i<m; i++) {
987     idx    = a->j + a->i[i];
988     v      = a->a + a->i[i];
989     n      = a->i[i+1] - a->i[i];
990     alpha1 = x[6*i];
991     alpha2 = x[6*i+1];
992     alpha3 = x[6*i+2];
993     alpha4 = x[6*i+3];
994     alpha5 = x[6*i+4];
995     alpha6 = x[6*i+5];
996     while (n-->0) {
997       y[6*(*idx)]   += alpha1*(*v);
998       y[6*(*idx)+1] += alpha2*(*v);
999       y[6*(*idx)+2] += alpha3*(*v);
1000       y[6*(*idx)+3] += alpha4*(*v);
1001       y[6*(*idx)+4] += alpha5*(*v);
1002       y[6*(*idx)+5] += alpha6*(*v);
1003       idx++; v++;
1004     }
1005   }
1006   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1007   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1008   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 /* ------------------------------------------------------------------------------*/
1013 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1014 {
1015   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1016   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1017   const PetscScalar *x,*v;
1018   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1019   PetscErrorCode    ierr;
1020   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1021   PetscInt          nonzerorow=0,n,i,jrow,j;
1022 
1023   PetscFunctionBegin;
1024   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1025   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1026   idx  = a->j;
1027   v    = a->a;
1028   ii   = a->i;
1029 
1030   for (i=0; i<m; i++) {
1031     jrow  = ii[i];
1032     n     = ii[i+1] - jrow;
1033     sum1  = 0.0;
1034     sum2  = 0.0;
1035     sum3  = 0.0;
1036     sum4  = 0.0;
1037     sum5  = 0.0;
1038     sum6  = 0.0;
1039     sum7  = 0.0;
1040 
1041     nonzerorow += (n>0);
1042     for (j=0; j<n; j++) {
1043       sum1 += v[jrow]*x[7*idx[jrow]];
1044       sum2 += v[jrow]*x[7*idx[jrow]+1];
1045       sum3 += v[jrow]*x[7*idx[jrow]+2];
1046       sum4 += v[jrow]*x[7*idx[jrow]+3];
1047       sum5 += v[jrow]*x[7*idx[jrow]+4];
1048       sum6 += v[jrow]*x[7*idx[jrow]+5];
1049       sum7 += v[jrow]*x[7*idx[jrow]+6];
1050       jrow++;
1051     }
1052     y[7*i]   = sum1;
1053     y[7*i+1] = sum2;
1054     y[7*i+2] = sum3;
1055     y[7*i+3] = sum4;
1056     y[7*i+4] = sum5;
1057     y[7*i+5] = sum6;
1058     y[7*i+6] = sum7;
1059   }
1060 
1061   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
1062   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1063   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1068 {
1069   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1070   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1071   const PetscScalar *x,*v;
1072   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1073   PetscErrorCode    ierr;
1074   const PetscInt    m = b->AIJ->rmap->n,*idx;
1075   PetscInt          n,i;
1076 
1077   PetscFunctionBegin;
1078   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1079   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1080   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1081 
1082   for (i=0; i<m; i++) {
1083     idx    = a->j + a->i[i];
1084     v      = a->a + a->i[i];
1085     n      = a->i[i+1] - a->i[i];
1086     alpha1 = x[7*i];
1087     alpha2 = x[7*i+1];
1088     alpha3 = x[7*i+2];
1089     alpha4 = x[7*i+3];
1090     alpha5 = x[7*i+4];
1091     alpha6 = x[7*i+5];
1092     alpha7 = x[7*i+6];
1093     while (n-->0) {
1094       y[7*(*idx)]   += alpha1*(*v);
1095       y[7*(*idx)+1] += alpha2*(*v);
1096       y[7*(*idx)+2] += alpha3*(*v);
1097       y[7*(*idx)+3] += alpha4*(*v);
1098       y[7*(*idx)+4] += alpha5*(*v);
1099       y[7*(*idx)+5] += alpha6*(*v);
1100       y[7*(*idx)+6] += alpha7*(*v);
1101       idx++; v++;
1102     }
1103   }
1104   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1105   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1106   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1111 {
1112   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1113   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1114   const PetscScalar *x,*v;
1115   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1116   PetscErrorCode    ierr;
1117   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1118   PetscInt          n,i,jrow,j;
1119 
1120   PetscFunctionBegin;
1121   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1122   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1123   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1124   idx  = a->j;
1125   v    = a->a;
1126   ii   = a->i;
1127 
1128   for (i=0; i<m; i++) {
1129     jrow = ii[i];
1130     n    = ii[i+1] - jrow;
1131     sum1 = 0.0;
1132     sum2 = 0.0;
1133     sum3 = 0.0;
1134     sum4 = 0.0;
1135     sum5 = 0.0;
1136     sum6 = 0.0;
1137     sum7 = 0.0;
1138     for (j=0; j<n; j++) {
1139       sum1 += v[jrow]*x[7*idx[jrow]];
1140       sum2 += v[jrow]*x[7*idx[jrow]+1];
1141       sum3 += v[jrow]*x[7*idx[jrow]+2];
1142       sum4 += v[jrow]*x[7*idx[jrow]+3];
1143       sum5 += v[jrow]*x[7*idx[jrow]+4];
1144       sum6 += v[jrow]*x[7*idx[jrow]+5];
1145       sum7 += v[jrow]*x[7*idx[jrow]+6];
1146       jrow++;
1147     }
1148     y[7*i]   += sum1;
1149     y[7*i+1] += sum2;
1150     y[7*i+2] += sum3;
1151     y[7*i+3] += sum4;
1152     y[7*i+4] += sum5;
1153     y[7*i+5] += sum6;
1154     y[7*i+6] += sum7;
1155   }
1156 
1157   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1158   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1159   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1164 {
1165   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1166   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1167   const PetscScalar *x,*v;
1168   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1169   PetscErrorCode    ierr;
1170   const PetscInt    m = b->AIJ->rmap->n,*idx;
1171   PetscInt          n,i;
1172 
1173   PetscFunctionBegin;
1174   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1175   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1176   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1177   for (i=0; i<m; i++) {
1178     idx    = a->j + a->i[i];
1179     v      = a->a + a->i[i];
1180     n      = a->i[i+1] - a->i[i];
1181     alpha1 = x[7*i];
1182     alpha2 = x[7*i+1];
1183     alpha3 = x[7*i+2];
1184     alpha4 = x[7*i+3];
1185     alpha5 = x[7*i+4];
1186     alpha6 = x[7*i+5];
1187     alpha7 = x[7*i+6];
1188     while (n-->0) {
1189       y[7*(*idx)]   += alpha1*(*v);
1190       y[7*(*idx)+1] += alpha2*(*v);
1191       y[7*(*idx)+2] += alpha3*(*v);
1192       y[7*(*idx)+3] += alpha4*(*v);
1193       y[7*(*idx)+4] += alpha5*(*v);
1194       y[7*(*idx)+5] += alpha6*(*v);
1195       y[7*(*idx)+6] += alpha7*(*v);
1196       idx++; v++;
1197     }
1198   }
1199   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1200   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1201   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1206 {
1207   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1208   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1209   const PetscScalar *x,*v;
1210   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1211   PetscErrorCode    ierr;
1212   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1213   PetscInt          nonzerorow=0,n,i,jrow,j;
1214 
1215   PetscFunctionBegin;
1216   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1217   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1218   idx  = a->j;
1219   v    = a->a;
1220   ii   = a->i;
1221 
1222   for (i=0; i<m; i++) {
1223     jrow  = ii[i];
1224     n     = ii[i+1] - jrow;
1225     sum1  = 0.0;
1226     sum2  = 0.0;
1227     sum3  = 0.0;
1228     sum4  = 0.0;
1229     sum5  = 0.0;
1230     sum6  = 0.0;
1231     sum7  = 0.0;
1232     sum8  = 0.0;
1233 
1234     nonzerorow += (n>0);
1235     for (j=0; j<n; j++) {
1236       sum1 += v[jrow]*x[8*idx[jrow]];
1237       sum2 += v[jrow]*x[8*idx[jrow]+1];
1238       sum3 += v[jrow]*x[8*idx[jrow]+2];
1239       sum4 += v[jrow]*x[8*idx[jrow]+3];
1240       sum5 += v[jrow]*x[8*idx[jrow]+4];
1241       sum6 += v[jrow]*x[8*idx[jrow]+5];
1242       sum7 += v[jrow]*x[8*idx[jrow]+6];
1243       sum8 += v[jrow]*x[8*idx[jrow]+7];
1244       jrow++;
1245     }
1246     y[8*i]   = sum1;
1247     y[8*i+1] = sum2;
1248     y[8*i+2] = sum3;
1249     y[8*i+3] = sum4;
1250     y[8*i+4] = sum5;
1251     y[8*i+5] = sum6;
1252     y[8*i+6] = sum7;
1253     y[8*i+7] = sum8;
1254   }
1255 
1256   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
1257   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1258   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1263 {
1264   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1265   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1266   const PetscScalar *x,*v;
1267   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1268   PetscErrorCode    ierr;
1269   const PetscInt    m = b->AIJ->rmap->n,*idx;
1270   PetscInt          n,i;
1271 
1272   PetscFunctionBegin;
1273   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1274   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1275   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1276 
1277   for (i=0; i<m; i++) {
1278     idx    = a->j + a->i[i];
1279     v      = a->a + a->i[i];
1280     n      = a->i[i+1] - a->i[i];
1281     alpha1 = x[8*i];
1282     alpha2 = x[8*i+1];
1283     alpha3 = x[8*i+2];
1284     alpha4 = x[8*i+3];
1285     alpha5 = x[8*i+4];
1286     alpha6 = x[8*i+5];
1287     alpha7 = x[8*i+6];
1288     alpha8 = x[8*i+7];
1289     while (n-->0) {
1290       y[8*(*idx)]   += alpha1*(*v);
1291       y[8*(*idx)+1] += alpha2*(*v);
1292       y[8*(*idx)+2] += alpha3*(*v);
1293       y[8*(*idx)+3] += alpha4*(*v);
1294       y[8*(*idx)+4] += alpha5*(*v);
1295       y[8*(*idx)+5] += alpha6*(*v);
1296       y[8*(*idx)+6] += alpha7*(*v);
1297       y[8*(*idx)+7] += alpha8*(*v);
1298       idx++; v++;
1299     }
1300   }
1301   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1302   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1303   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1308 {
1309   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1310   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1311   const PetscScalar *x,*v;
1312   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1313   PetscErrorCode    ierr;
1314   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1315   PetscInt          n,i,jrow,j;
1316 
1317   PetscFunctionBegin;
1318   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1319   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1320   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1321   idx  = a->j;
1322   v    = a->a;
1323   ii   = a->i;
1324 
1325   for (i=0; i<m; i++) {
1326     jrow = ii[i];
1327     n    = ii[i+1] - jrow;
1328     sum1 = 0.0;
1329     sum2 = 0.0;
1330     sum3 = 0.0;
1331     sum4 = 0.0;
1332     sum5 = 0.0;
1333     sum6 = 0.0;
1334     sum7 = 0.0;
1335     sum8 = 0.0;
1336     for (j=0; j<n; j++) {
1337       sum1 += v[jrow]*x[8*idx[jrow]];
1338       sum2 += v[jrow]*x[8*idx[jrow]+1];
1339       sum3 += v[jrow]*x[8*idx[jrow]+2];
1340       sum4 += v[jrow]*x[8*idx[jrow]+3];
1341       sum5 += v[jrow]*x[8*idx[jrow]+4];
1342       sum6 += v[jrow]*x[8*idx[jrow]+5];
1343       sum7 += v[jrow]*x[8*idx[jrow]+6];
1344       sum8 += v[jrow]*x[8*idx[jrow]+7];
1345       jrow++;
1346     }
1347     y[8*i]   += sum1;
1348     y[8*i+1] += sum2;
1349     y[8*i+2] += sum3;
1350     y[8*i+3] += sum4;
1351     y[8*i+4] += sum5;
1352     y[8*i+5] += sum6;
1353     y[8*i+6] += sum7;
1354     y[8*i+7] += sum8;
1355   }
1356 
1357   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1358   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1359   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1364 {
1365   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1366   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1367   const PetscScalar *x,*v;
1368   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1369   PetscErrorCode    ierr;
1370   const PetscInt    m = b->AIJ->rmap->n,*idx;
1371   PetscInt          n,i;
1372 
1373   PetscFunctionBegin;
1374   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1375   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1376   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1377   for (i=0; i<m; i++) {
1378     idx    = a->j + a->i[i];
1379     v      = a->a + a->i[i];
1380     n      = a->i[i+1] - a->i[i];
1381     alpha1 = x[8*i];
1382     alpha2 = x[8*i+1];
1383     alpha3 = x[8*i+2];
1384     alpha4 = x[8*i+3];
1385     alpha5 = x[8*i+4];
1386     alpha6 = x[8*i+5];
1387     alpha7 = x[8*i+6];
1388     alpha8 = x[8*i+7];
1389     while (n-->0) {
1390       y[8*(*idx)]   += alpha1*(*v);
1391       y[8*(*idx)+1] += alpha2*(*v);
1392       y[8*(*idx)+2] += alpha3*(*v);
1393       y[8*(*idx)+3] += alpha4*(*v);
1394       y[8*(*idx)+4] += alpha5*(*v);
1395       y[8*(*idx)+5] += alpha6*(*v);
1396       y[8*(*idx)+6] += alpha7*(*v);
1397       y[8*(*idx)+7] += alpha8*(*v);
1398       idx++; v++;
1399     }
1400   }
1401   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1402   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1403   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 /* ------------------------------------------------------------------------------*/
1408 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1409 {
1410   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1411   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1412   const PetscScalar *x,*v;
1413   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1414   PetscErrorCode    ierr;
1415   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1416   PetscInt          nonzerorow=0,n,i,jrow,j;
1417 
1418   PetscFunctionBegin;
1419   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1420   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1421   idx  = a->j;
1422   v    = a->a;
1423   ii   = a->i;
1424 
1425   for (i=0; i<m; i++) {
1426     jrow  = ii[i];
1427     n     = ii[i+1] - jrow;
1428     sum1  = 0.0;
1429     sum2  = 0.0;
1430     sum3  = 0.0;
1431     sum4  = 0.0;
1432     sum5  = 0.0;
1433     sum6  = 0.0;
1434     sum7  = 0.0;
1435     sum8  = 0.0;
1436     sum9  = 0.0;
1437 
1438     nonzerorow += (n>0);
1439     for (j=0; j<n; j++) {
1440       sum1 += v[jrow]*x[9*idx[jrow]];
1441       sum2 += v[jrow]*x[9*idx[jrow]+1];
1442       sum3 += v[jrow]*x[9*idx[jrow]+2];
1443       sum4 += v[jrow]*x[9*idx[jrow]+3];
1444       sum5 += v[jrow]*x[9*idx[jrow]+4];
1445       sum6 += v[jrow]*x[9*idx[jrow]+5];
1446       sum7 += v[jrow]*x[9*idx[jrow]+6];
1447       sum8 += v[jrow]*x[9*idx[jrow]+7];
1448       sum9 += v[jrow]*x[9*idx[jrow]+8];
1449       jrow++;
1450     }
1451     y[9*i]   = sum1;
1452     y[9*i+1] = sum2;
1453     y[9*i+2] = sum3;
1454     y[9*i+3] = sum4;
1455     y[9*i+4] = sum5;
1456     y[9*i+5] = sum6;
1457     y[9*i+6] = sum7;
1458     y[9*i+7] = sum8;
1459     y[9*i+8] = sum9;
1460   }
1461 
1462   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
1463   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1464   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 /* ------------------------------------------------------------------------------*/
1469 
1470 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1471 {
1472   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1473   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1474   const PetscScalar *x,*v;
1475   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1476   PetscErrorCode    ierr;
1477   const PetscInt    m = b->AIJ->rmap->n,*idx;
1478   PetscInt          n,i;
1479 
1480   PetscFunctionBegin;
1481   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1482   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1483   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1484 
1485   for (i=0; i<m; i++) {
1486     idx    = a->j + a->i[i];
1487     v      = a->a + a->i[i];
1488     n      = a->i[i+1] - a->i[i];
1489     alpha1 = x[9*i];
1490     alpha2 = x[9*i+1];
1491     alpha3 = x[9*i+2];
1492     alpha4 = x[9*i+3];
1493     alpha5 = x[9*i+4];
1494     alpha6 = x[9*i+5];
1495     alpha7 = x[9*i+6];
1496     alpha8 = x[9*i+7];
1497     alpha9 = x[9*i+8];
1498     while (n-->0) {
1499       y[9*(*idx)]   += alpha1*(*v);
1500       y[9*(*idx)+1] += alpha2*(*v);
1501       y[9*(*idx)+2] += alpha3*(*v);
1502       y[9*(*idx)+3] += alpha4*(*v);
1503       y[9*(*idx)+4] += alpha5*(*v);
1504       y[9*(*idx)+5] += alpha6*(*v);
1505       y[9*(*idx)+6] += alpha7*(*v);
1506       y[9*(*idx)+7] += alpha8*(*v);
1507       y[9*(*idx)+8] += alpha9*(*v);
1508       idx++; v++;
1509     }
1510   }
1511   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1512   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1513   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1518 {
1519   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1520   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1521   const PetscScalar *x,*v;
1522   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1523   PetscErrorCode    ierr;
1524   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1525   PetscInt          n,i,jrow,j;
1526 
1527   PetscFunctionBegin;
1528   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1529   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1530   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1531   idx  = a->j;
1532   v    = a->a;
1533   ii   = a->i;
1534 
1535   for (i=0; i<m; i++) {
1536     jrow = ii[i];
1537     n    = ii[i+1] - jrow;
1538     sum1 = 0.0;
1539     sum2 = 0.0;
1540     sum3 = 0.0;
1541     sum4 = 0.0;
1542     sum5 = 0.0;
1543     sum6 = 0.0;
1544     sum7 = 0.0;
1545     sum8 = 0.0;
1546     sum9 = 0.0;
1547     for (j=0; j<n; j++) {
1548       sum1 += v[jrow]*x[9*idx[jrow]];
1549       sum2 += v[jrow]*x[9*idx[jrow]+1];
1550       sum3 += v[jrow]*x[9*idx[jrow]+2];
1551       sum4 += v[jrow]*x[9*idx[jrow]+3];
1552       sum5 += v[jrow]*x[9*idx[jrow]+4];
1553       sum6 += v[jrow]*x[9*idx[jrow]+5];
1554       sum7 += v[jrow]*x[9*idx[jrow]+6];
1555       sum8 += v[jrow]*x[9*idx[jrow]+7];
1556       sum9 += v[jrow]*x[9*idx[jrow]+8];
1557       jrow++;
1558     }
1559     y[9*i]   += sum1;
1560     y[9*i+1] += sum2;
1561     y[9*i+2] += sum3;
1562     y[9*i+3] += sum4;
1563     y[9*i+4] += sum5;
1564     y[9*i+5] += sum6;
1565     y[9*i+6] += sum7;
1566     y[9*i+7] += sum8;
1567     y[9*i+8] += sum9;
1568   }
1569 
1570   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1571   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1572   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1577 {
1578   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1579   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1580   const PetscScalar *x,*v;
1581   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1582   PetscErrorCode    ierr;
1583   const PetscInt    m = b->AIJ->rmap->n,*idx;
1584   PetscInt          n,i;
1585 
1586   PetscFunctionBegin;
1587   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1588   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1589   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1590   for (i=0; i<m; i++) {
1591     idx    = a->j + a->i[i];
1592     v      = a->a + a->i[i];
1593     n      = a->i[i+1] - a->i[i];
1594     alpha1 = x[9*i];
1595     alpha2 = x[9*i+1];
1596     alpha3 = x[9*i+2];
1597     alpha4 = x[9*i+3];
1598     alpha5 = x[9*i+4];
1599     alpha6 = x[9*i+5];
1600     alpha7 = x[9*i+6];
1601     alpha8 = x[9*i+7];
1602     alpha9 = x[9*i+8];
1603     while (n-->0) {
1604       y[9*(*idx)]   += alpha1*(*v);
1605       y[9*(*idx)+1] += alpha2*(*v);
1606       y[9*(*idx)+2] += alpha3*(*v);
1607       y[9*(*idx)+3] += alpha4*(*v);
1608       y[9*(*idx)+4] += alpha5*(*v);
1609       y[9*(*idx)+5] += alpha6*(*v);
1610       y[9*(*idx)+6] += alpha7*(*v);
1611       y[9*(*idx)+7] += alpha8*(*v);
1612       y[9*(*idx)+8] += alpha9*(*v);
1613       idx++; v++;
1614     }
1615   }
1616   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1617   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1618   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1619   PetscFunctionReturn(0);
1620 }
1621 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1622 {
1623   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1624   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1625   const PetscScalar *x,*v;
1626   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1627   PetscErrorCode    ierr;
1628   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1629   PetscInt          nonzerorow=0,n,i,jrow,j;
1630 
1631   PetscFunctionBegin;
1632   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1633   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1634   idx  = a->j;
1635   v    = a->a;
1636   ii   = a->i;
1637 
1638   for (i=0; i<m; i++) {
1639     jrow  = ii[i];
1640     n     = ii[i+1] - jrow;
1641     sum1  = 0.0;
1642     sum2  = 0.0;
1643     sum3  = 0.0;
1644     sum4  = 0.0;
1645     sum5  = 0.0;
1646     sum6  = 0.0;
1647     sum7  = 0.0;
1648     sum8  = 0.0;
1649     sum9  = 0.0;
1650     sum10 = 0.0;
1651 
1652     nonzerorow += (n>0);
1653     for (j=0; j<n; j++) {
1654       sum1  += v[jrow]*x[10*idx[jrow]];
1655       sum2  += v[jrow]*x[10*idx[jrow]+1];
1656       sum3  += v[jrow]*x[10*idx[jrow]+2];
1657       sum4  += v[jrow]*x[10*idx[jrow]+3];
1658       sum5  += v[jrow]*x[10*idx[jrow]+4];
1659       sum6  += v[jrow]*x[10*idx[jrow]+5];
1660       sum7  += v[jrow]*x[10*idx[jrow]+6];
1661       sum8  += v[jrow]*x[10*idx[jrow]+7];
1662       sum9  += v[jrow]*x[10*idx[jrow]+8];
1663       sum10 += v[jrow]*x[10*idx[jrow]+9];
1664       jrow++;
1665     }
1666     y[10*i]   = sum1;
1667     y[10*i+1] = sum2;
1668     y[10*i+2] = sum3;
1669     y[10*i+3] = sum4;
1670     y[10*i+4] = sum5;
1671     y[10*i+5] = sum6;
1672     y[10*i+6] = sum7;
1673     y[10*i+7] = sum8;
1674     y[10*i+8] = sum9;
1675     y[10*i+9] = sum10;
1676   }
1677 
1678   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
1679   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1680   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1685 {
1686   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1687   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1688   const PetscScalar *x,*v;
1689   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1690   PetscErrorCode    ierr;
1691   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1692   PetscInt          n,i,jrow,j;
1693 
1694   PetscFunctionBegin;
1695   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1696   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1697   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1698   idx  = a->j;
1699   v    = a->a;
1700   ii   = a->i;
1701 
1702   for (i=0; i<m; i++) {
1703     jrow  = ii[i];
1704     n     = ii[i+1] - jrow;
1705     sum1  = 0.0;
1706     sum2  = 0.0;
1707     sum3  = 0.0;
1708     sum4  = 0.0;
1709     sum5  = 0.0;
1710     sum6  = 0.0;
1711     sum7  = 0.0;
1712     sum8  = 0.0;
1713     sum9  = 0.0;
1714     sum10 = 0.0;
1715     for (j=0; j<n; j++) {
1716       sum1  += v[jrow]*x[10*idx[jrow]];
1717       sum2  += v[jrow]*x[10*idx[jrow]+1];
1718       sum3  += v[jrow]*x[10*idx[jrow]+2];
1719       sum4  += v[jrow]*x[10*idx[jrow]+3];
1720       sum5  += v[jrow]*x[10*idx[jrow]+4];
1721       sum6  += v[jrow]*x[10*idx[jrow]+5];
1722       sum7  += v[jrow]*x[10*idx[jrow]+6];
1723       sum8  += v[jrow]*x[10*idx[jrow]+7];
1724       sum9  += v[jrow]*x[10*idx[jrow]+8];
1725       sum10 += v[jrow]*x[10*idx[jrow]+9];
1726       jrow++;
1727     }
1728     y[10*i]   += sum1;
1729     y[10*i+1] += sum2;
1730     y[10*i+2] += sum3;
1731     y[10*i+3] += sum4;
1732     y[10*i+4] += sum5;
1733     y[10*i+5] += sum6;
1734     y[10*i+6] += sum7;
1735     y[10*i+7] += sum8;
1736     y[10*i+8] += sum9;
1737     y[10*i+9] += sum10;
1738   }
1739 
1740   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1741   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1742   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1747 {
1748   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1749   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1750   const PetscScalar *x,*v;
1751   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1752   PetscErrorCode    ierr;
1753   const PetscInt    m = b->AIJ->rmap->n,*idx;
1754   PetscInt          n,i;
1755 
1756   PetscFunctionBegin;
1757   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1758   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1759   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1760 
1761   for (i=0; i<m; i++) {
1762     idx     = a->j + a->i[i];
1763     v       = a->a + a->i[i];
1764     n       = a->i[i+1] - a->i[i];
1765     alpha1  = x[10*i];
1766     alpha2  = x[10*i+1];
1767     alpha3  = x[10*i+2];
1768     alpha4  = x[10*i+3];
1769     alpha5  = x[10*i+4];
1770     alpha6  = x[10*i+5];
1771     alpha7  = x[10*i+6];
1772     alpha8  = x[10*i+7];
1773     alpha9  = x[10*i+8];
1774     alpha10 = x[10*i+9];
1775     while (n-->0) {
1776       y[10*(*idx)]   += alpha1*(*v);
1777       y[10*(*idx)+1] += alpha2*(*v);
1778       y[10*(*idx)+2] += alpha3*(*v);
1779       y[10*(*idx)+3] += alpha4*(*v);
1780       y[10*(*idx)+4] += alpha5*(*v);
1781       y[10*(*idx)+5] += alpha6*(*v);
1782       y[10*(*idx)+6] += alpha7*(*v);
1783       y[10*(*idx)+7] += alpha8*(*v);
1784       y[10*(*idx)+8] += alpha9*(*v);
1785       y[10*(*idx)+9] += alpha10*(*v);
1786       idx++; v++;
1787     }
1788   }
1789   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1790   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1791   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1796 {
1797   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1798   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1799   const PetscScalar *x,*v;
1800   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1801   PetscErrorCode    ierr;
1802   const PetscInt    m = b->AIJ->rmap->n,*idx;
1803   PetscInt          n,i;
1804 
1805   PetscFunctionBegin;
1806   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1807   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1808   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1809   for (i=0; i<m; i++) {
1810     idx     = a->j + a->i[i];
1811     v       = a->a + a->i[i];
1812     n       = a->i[i+1] - a->i[i];
1813     alpha1  = x[10*i];
1814     alpha2  = x[10*i+1];
1815     alpha3  = x[10*i+2];
1816     alpha4  = x[10*i+3];
1817     alpha5  = x[10*i+4];
1818     alpha6  = x[10*i+5];
1819     alpha7  = x[10*i+6];
1820     alpha8  = x[10*i+7];
1821     alpha9  = x[10*i+8];
1822     alpha10 = x[10*i+9];
1823     while (n-->0) {
1824       y[10*(*idx)]   += alpha1*(*v);
1825       y[10*(*idx)+1] += alpha2*(*v);
1826       y[10*(*idx)+2] += alpha3*(*v);
1827       y[10*(*idx)+3] += alpha4*(*v);
1828       y[10*(*idx)+4] += alpha5*(*v);
1829       y[10*(*idx)+5] += alpha6*(*v);
1830       y[10*(*idx)+6] += alpha7*(*v);
1831       y[10*(*idx)+7] += alpha8*(*v);
1832       y[10*(*idx)+8] += alpha9*(*v);
1833       y[10*(*idx)+9] += alpha10*(*v);
1834       idx++; v++;
1835     }
1836   }
1837   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1838   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1839   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 /*--------------------------------------------------------------------------------------------*/
1844 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1845 {
1846   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1847   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1848   const PetscScalar *x,*v;
1849   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1850   PetscErrorCode    ierr;
1851   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1852   PetscInt          nonzerorow=0,n,i,jrow,j;
1853 
1854   PetscFunctionBegin;
1855   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1856   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1857   idx  = a->j;
1858   v    = a->a;
1859   ii   = a->i;
1860 
1861   for (i=0; i<m; i++) {
1862     jrow  = ii[i];
1863     n     = ii[i+1] - jrow;
1864     sum1  = 0.0;
1865     sum2  = 0.0;
1866     sum3  = 0.0;
1867     sum4  = 0.0;
1868     sum5  = 0.0;
1869     sum6  = 0.0;
1870     sum7  = 0.0;
1871     sum8  = 0.0;
1872     sum9  = 0.0;
1873     sum10 = 0.0;
1874     sum11 = 0.0;
1875 
1876     nonzerorow += (n>0);
1877     for (j=0; j<n; j++) {
1878       sum1  += v[jrow]*x[11*idx[jrow]];
1879       sum2  += v[jrow]*x[11*idx[jrow]+1];
1880       sum3  += v[jrow]*x[11*idx[jrow]+2];
1881       sum4  += v[jrow]*x[11*idx[jrow]+3];
1882       sum5  += v[jrow]*x[11*idx[jrow]+4];
1883       sum6  += v[jrow]*x[11*idx[jrow]+5];
1884       sum7  += v[jrow]*x[11*idx[jrow]+6];
1885       sum8  += v[jrow]*x[11*idx[jrow]+7];
1886       sum9  += v[jrow]*x[11*idx[jrow]+8];
1887       sum10 += v[jrow]*x[11*idx[jrow]+9];
1888       sum11 += v[jrow]*x[11*idx[jrow]+10];
1889       jrow++;
1890     }
1891     y[11*i]    = sum1;
1892     y[11*i+1]  = sum2;
1893     y[11*i+2]  = sum3;
1894     y[11*i+3]  = sum4;
1895     y[11*i+4]  = sum5;
1896     y[11*i+5]  = sum6;
1897     y[11*i+6]  = sum7;
1898     y[11*i+7]  = sum8;
1899     y[11*i+8]  = sum9;
1900     y[11*i+9]  = sum10;
1901     y[11*i+10] = sum11;
1902   }
1903 
1904   ierr = PetscLogFlops(22.0*a->nz - 11*nonzerorow);CHKERRQ(ierr);
1905   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1906   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1911 {
1912   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1913   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1914   const PetscScalar *x,*v;
1915   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1916   PetscErrorCode    ierr;
1917   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1918   PetscInt          n,i,jrow,j;
1919 
1920   PetscFunctionBegin;
1921   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1922   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1923   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1924   idx  = a->j;
1925   v    = a->a;
1926   ii   = a->i;
1927 
1928   for (i=0; i<m; i++) {
1929     jrow  = ii[i];
1930     n     = ii[i+1] - jrow;
1931     sum1  = 0.0;
1932     sum2  = 0.0;
1933     sum3  = 0.0;
1934     sum4  = 0.0;
1935     sum5  = 0.0;
1936     sum6  = 0.0;
1937     sum7  = 0.0;
1938     sum8  = 0.0;
1939     sum9  = 0.0;
1940     sum10 = 0.0;
1941     sum11 = 0.0;
1942     for (j=0; j<n; j++) {
1943       sum1  += v[jrow]*x[11*idx[jrow]];
1944       sum2  += v[jrow]*x[11*idx[jrow]+1];
1945       sum3  += v[jrow]*x[11*idx[jrow]+2];
1946       sum4  += v[jrow]*x[11*idx[jrow]+3];
1947       sum5  += v[jrow]*x[11*idx[jrow]+4];
1948       sum6  += v[jrow]*x[11*idx[jrow]+5];
1949       sum7  += v[jrow]*x[11*idx[jrow]+6];
1950       sum8  += v[jrow]*x[11*idx[jrow]+7];
1951       sum9  += v[jrow]*x[11*idx[jrow]+8];
1952       sum10 += v[jrow]*x[11*idx[jrow]+9];
1953       sum11 += v[jrow]*x[11*idx[jrow]+10];
1954       jrow++;
1955     }
1956     y[11*i]    += sum1;
1957     y[11*i+1]  += sum2;
1958     y[11*i+2]  += sum3;
1959     y[11*i+3]  += sum4;
1960     y[11*i+4]  += sum5;
1961     y[11*i+5]  += sum6;
1962     y[11*i+6]  += sum7;
1963     y[11*i+7]  += sum8;
1964     y[11*i+8]  += sum9;
1965     y[11*i+9]  += sum10;
1966     y[11*i+10] += sum11;
1967   }
1968 
1969   ierr = PetscLogFlops(22.0*a->nz);CHKERRQ(ierr);
1970   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1971   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1972   PetscFunctionReturn(0);
1973 }
1974 
1975 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1976 {
1977   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1978   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1979   const PetscScalar *x,*v;
1980   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1981   PetscErrorCode    ierr;
1982   const PetscInt    m = b->AIJ->rmap->n,*idx;
1983   PetscInt          n,i;
1984 
1985   PetscFunctionBegin;
1986   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1987   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1988   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1989 
1990   for (i=0; i<m; i++) {
1991     idx     = a->j + a->i[i];
1992     v       = a->a + a->i[i];
1993     n       = a->i[i+1] - a->i[i];
1994     alpha1  = x[11*i];
1995     alpha2  = x[11*i+1];
1996     alpha3  = x[11*i+2];
1997     alpha4  = x[11*i+3];
1998     alpha5  = x[11*i+4];
1999     alpha6  = x[11*i+5];
2000     alpha7  = x[11*i+6];
2001     alpha8  = x[11*i+7];
2002     alpha9  = x[11*i+8];
2003     alpha10 = x[11*i+9];
2004     alpha11 = x[11*i+10];
2005     while (n-->0) {
2006       y[11*(*idx)]    += alpha1*(*v);
2007       y[11*(*idx)+1]  += alpha2*(*v);
2008       y[11*(*idx)+2]  += alpha3*(*v);
2009       y[11*(*idx)+3]  += alpha4*(*v);
2010       y[11*(*idx)+4]  += alpha5*(*v);
2011       y[11*(*idx)+5]  += alpha6*(*v);
2012       y[11*(*idx)+6]  += alpha7*(*v);
2013       y[11*(*idx)+7]  += alpha8*(*v);
2014       y[11*(*idx)+8]  += alpha9*(*v);
2015       y[11*(*idx)+9]  += alpha10*(*v);
2016       y[11*(*idx)+10] += alpha11*(*v);
2017       idx++; v++;
2018     }
2019   }
2020   ierr = PetscLogFlops(22.0*a->nz);CHKERRQ(ierr);
2021   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2022   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2027 {
2028   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2029   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2030   const PetscScalar *x,*v;
2031   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2032   PetscErrorCode    ierr;
2033   const PetscInt    m = b->AIJ->rmap->n,*idx;
2034   PetscInt          n,i;
2035 
2036   PetscFunctionBegin;
2037   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2038   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2039   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2040   for (i=0; i<m; i++) {
2041     idx     = a->j + a->i[i];
2042     v       = a->a + a->i[i];
2043     n       = a->i[i+1] - a->i[i];
2044     alpha1  = x[11*i];
2045     alpha2  = x[11*i+1];
2046     alpha3  = x[11*i+2];
2047     alpha4  = x[11*i+3];
2048     alpha5  = x[11*i+4];
2049     alpha6  = x[11*i+5];
2050     alpha7  = x[11*i+6];
2051     alpha8  = x[11*i+7];
2052     alpha9  = x[11*i+8];
2053     alpha10 = x[11*i+9];
2054     alpha11 = x[11*i+10];
2055     while (n-->0) {
2056       y[11*(*idx)]    += alpha1*(*v);
2057       y[11*(*idx)+1]  += alpha2*(*v);
2058       y[11*(*idx)+2]  += alpha3*(*v);
2059       y[11*(*idx)+3]  += alpha4*(*v);
2060       y[11*(*idx)+4]  += alpha5*(*v);
2061       y[11*(*idx)+5]  += alpha6*(*v);
2062       y[11*(*idx)+6]  += alpha7*(*v);
2063       y[11*(*idx)+7]  += alpha8*(*v);
2064       y[11*(*idx)+8]  += alpha9*(*v);
2065       y[11*(*idx)+9]  += alpha10*(*v);
2066       y[11*(*idx)+10] += alpha11*(*v);
2067       idx++; v++;
2068     }
2069   }
2070   ierr = PetscLogFlops(22.0*a->nz);CHKERRQ(ierr);
2071   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2072   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 /*--------------------------------------------------------------------------------------------*/
2077 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2078 {
2079   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2080   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2081   const PetscScalar *x,*v;
2082   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2083   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2084   PetscErrorCode    ierr;
2085   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2086   PetscInt          nonzerorow=0,n,i,jrow,j;
2087 
2088   PetscFunctionBegin;
2089   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2090   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2091   idx  = a->j;
2092   v    = a->a;
2093   ii   = a->i;
2094 
2095   for (i=0; i<m; i++) {
2096     jrow  = ii[i];
2097     n     = ii[i+1] - jrow;
2098     sum1  = 0.0;
2099     sum2  = 0.0;
2100     sum3  = 0.0;
2101     sum4  = 0.0;
2102     sum5  = 0.0;
2103     sum6  = 0.0;
2104     sum7  = 0.0;
2105     sum8  = 0.0;
2106     sum9  = 0.0;
2107     sum10 = 0.0;
2108     sum11 = 0.0;
2109     sum12 = 0.0;
2110     sum13 = 0.0;
2111     sum14 = 0.0;
2112     sum15 = 0.0;
2113     sum16 = 0.0;
2114 
2115     nonzerorow += (n>0);
2116     for (j=0; j<n; j++) {
2117       sum1  += v[jrow]*x[16*idx[jrow]];
2118       sum2  += v[jrow]*x[16*idx[jrow]+1];
2119       sum3  += v[jrow]*x[16*idx[jrow]+2];
2120       sum4  += v[jrow]*x[16*idx[jrow]+3];
2121       sum5  += v[jrow]*x[16*idx[jrow]+4];
2122       sum6  += v[jrow]*x[16*idx[jrow]+5];
2123       sum7  += v[jrow]*x[16*idx[jrow]+6];
2124       sum8  += v[jrow]*x[16*idx[jrow]+7];
2125       sum9  += v[jrow]*x[16*idx[jrow]+8];
2126       sum10 += v[jrow]*x[16*idx[jrow]+9];
2127       sum11 += v[jrow]*x[16*idx[jrow]+10];
2128       sum12 += v[jrow]*x[16*idx[jrow]+11];
2129       sum13 += v[jrow]*x[16*idx[jrow]+12];
2130       sum14 += v[jrow]*x[16*idx[jrow]+13];
2131       sum15 += v[jrow]*x[16*idx[jrow]+14];
2132       sum16 += v[jrow]*x[16*idx[jrow]+15];
2133       jrow++;
2134     }
2135     y[16*i]    = sum1;
2136     y[16*i+1]  = sum2;
2137     y[16*i+2]  = sum3;
2138     y[16*i+3]  = sum4;
2139     y[16*i+4]  = sum5;
2140     y[16*i+5]  = sum6;
2141     y[16*i+6]  = sum7;
2142     y[16*i+7]  = sum8;
2143     y[16*i+8]  = sum9;
2144     y[16*i+9]  = sum10;
2145     y[16*i+10] = sum11;
2146     y[16*i+11] = sum12;
2147     y[16*i+12] = sum13;
2148     y[16*i+13] = sum14;
2149     y[16*i+14] = sum15;
2150     y[16*i+15] = sum16;
2151   }
2152 
2153   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
2154   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2155   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2160 {
2161   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2162   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2163   const PetscScalar *x,*v;
2164   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2165   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2166   PetscErrorCode    ierr;
2167   const PetscInt    m = b->AIJ->rmap->n,*idx;
2168   PetscInt          n,i;
2169 
2170   PetscFunctionBegin;
2171   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2172   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2173   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2174 
2175   for (i=0; i<m; i++) {
2176     idx     = a->j + a->i[i];
2177     v       = a->a + a->i[i];
2178     n       = a->i[i+1] - a->i[i];
2179     alpha1  = x[16*i];
2180     alpha2  = x[16*i+1];
2181     alpha3  = x[16*i+2];
2182     alpha4  = x[16*i+3];
2183     alpha5  = x[16*i+4];
2184     alpha6  = x[16*i+5];
2185     alpha7  = x[16*i+6];
2186     alpha8  = x[16*i+7];
2187     alpha9  = x[16*i+8];
2188     alpha10 = x[16*i+9];
2189     alpha11 = x[16*i+10];
2190     alpha12 = x[16*i+11];
2191     alpha13 = x[16*i+12];
2192     alpha14 = x[16*i+13];
2193     alpha15 = x[16*i+14];
2194     alpha16 = x[16*i+15];
2195     while (n-->0) {
2196       y[16*(*idx)]    += alpha1*(*v);
2197       y[16*(*idx)+1]  += alpha2*(*v);
2198       y[16*(*idx)+2]  += alpha3*(*v);
2199       y[16*(*idx)+3]  += alpha4*(*v);
2200       y[16*(*idx)+4]  += alpha5*(*v);
2201       y[16*(*idx)+5]  += alpha6*(*v);
2202       y[16*(*idx)+6]  += alpha7*(*v);
2203       y[16*(*idx)+7]  += alpha8*(*v);
2204       y[16*(*idx)+8]  += alpha9*(*v);
2205       y[16*(*idx)+9]  += alpha10*(*v);
2206       y[16*(*idx)+10] += alpha11*(*v);
2207       y[16*(*idx)+11] += alpha12*(*v);
2208       y[16*(*idx)+12] += alpha13*(*v);
2209       y[16*(*idx)+13] += alpha14*(*v);
2210       y[16*(*idx)+14] += alpha15*(*v);
2211       y[16*(*idx)+15] += alpha16*(*v);
2212       idx++; v++;
2213     }
2214   }
2215   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2216   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2217   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2222 {
2223   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2224   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2225   const PetscScalar *x,*v;
2226   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2227   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2228   PetscErrorCode    ierr;
2229   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2230   PetscInt          n,i,jrow,j;
2231 
2232   PetscFunctionBegin;
2233   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2234   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2235   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2236   idx  = a->j;
2237   v    = a->a;
2238   ii   = a->i;
2239 
2240   for (i=0; i<m; i++) {
2241     jrow  = ii[i];
2242     n     = ii[i+1] - jrow;
2243     sum1  = 0.0;
2244     sum2  = 0.0;
2245     sum3  = 0.0;
2246     sum4  = 0.0;
2247     sum5  = 0.0;
2248     sum6  = 0.0;
2249     sum7  = 0.0;
2250     sum8  = 0.0;
2251     sum9  = 0.0;
2252     sum10 = 0.0;
2253     sum11 = 0.0;
2254     sum12 = 0.0;
2255     sum13 = 0.0;
2256     sum14 = 0.0;
2257     sum15 = 0.0;
2258     sum16 = 0.0;
2259     for (j=0; j<n; j++) {
2260       sum1  += v[jrow]*x[16*idx[jrow]];
2261       sum2  += v[jrow]*x[16*idx[jrow]+1];
2262       sum3  += v[jrow]*x[16*idx[jrow]+2];
2263       sum4  += v[jrow]*x[16*idx[jrow]+3];
2264       sum5  += v[jrow]*x[16*idx[jrow]+4];
2265       sum6  += v[jrow]*x[16*idx[jrow]+5];
2266       sum7  += v[jrow]*x[16*idx[jrow]+6];
2267       sum8  += v[jrow]*x[16*idx[jrow]+7];
2268       sum9  += v[jrow]*x[16*idx[jrow]+8];
2269       sum10 += v[jrow]*x[16*idx[jrow]+9];
2270       sum11 += v[jrow]*x[16*idx[jrow]+10];
2271       sum12 += v[jrow]*x[16*idx[jrow]+11];
2272       sum13 += v[jrow]*x[16*idx[jrow]+12];
2273       sum14 += v[jrow]*x[16*idx[jrow]+13];
2274       sum15 += v[jrow]*x[16*idx[jrow]+14];
2275       sum16 += v[jrow]*x[16*idx[jrow]+15];
2276       jrow++;
2277     }
2278     y[16*i]    += sum1;
2279     y[16*i+1]  += sum2;
2280     y[16*i+2]  += sum3;
2281     y[16*i+3]  += sum4;
2282     y[16*i+4]  += sum5;
2283     y[16*i+5]  += sum6;
2284     y[16*i+6]  += sum7;
2285     y[16*i+7]  += sum8;
2286     y[16*i+8]  += sum9;
2287     y[16*i+9]  += sum10;
2288     y[16*i+10] += sum11;
2289     y[16*i+11] += sum12;
2290     y[16*i+12] += sum13;
2291     y[16*i+13] += sum14;
2292     y[16*i+14] += sum15;
2293     y[16*i+15] += sum16;
2294   }
2295 
2296   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2297   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2298   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2303 {
2304   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2305   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2306   const PetscScalar *x,*v;
2307   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2308   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2309   PetscErrorCode    ierr;
2310   const PetscInt    m = b->AIJ->rmap->n,*idx;
2311   PetscInt          n,i;
2312 
2313   PetscFunctionBegin;
2314   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2315   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2316   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2317   for (i=0; i<m; i++) {
2318     idx     = a->j + a->i[i];
2319     v       = a->a + a->i[i];
2320     n       = a->i[i+1] - a->i[i];
2321     alpha1  = x[16*i];
2322     alpha2  = x[16*i+1];
2323     alpha3  = x[16*i+2];
2324     alpha4  = x[16*i+3];
2325     alpha5  = x[16*i+4];
2326     alpha6  = x[16*i+5];
2327     alpha7  = x[16*i+6];
2328     alpha8  = x[16*i+7];
2329     alpha9  = x[16*i+8];
2330     alpha10 = x[16*i+9];
2331     alpha11 = x[16*i+10];
2332     alpha12 = x[16*i+11];
2333     alpha13 = x[16*i+12];
2334     alpha14 = x[16*i+13];
2335     alpha15 = x[16*i+14];
2336     alpha16 = x[16*i+15];
2337     while (n-->0) {
2338       y[16*(*idx)]    += alpha1*(*v);
2339       y[16*(*idx)+1]  += alpha2*(*v);
2340       y[16*(*idx)+2]  += alpha3*(*v);
2341       y[16*(*idx)+3]  += alpha4*(*v);
2342       y[16*(*idx)+4]  += alpha5*(*v);
2343       y[16*(*idx)+5]  += alpha6*(*v);
2344       y[16*(*idx)+6]  += alpha7*(*v);
2345       y[16*(*idx)+7]  += alpha8*(*v);
2346       y[16*(*idx)+8]  += alpha9*(*v);
2347       y[16*(*idx)+9]  += alpha10*(*v);
2348       y[16*(*idx)+10] += alpha11*(*v);
2349       y[16*(*idx)+11] += alpha12*(*v);
2350       y[16*(*idx)+12] += alpha13*(*v);
2351       y[16*(*idx)+13] += alpha14*(*v);
2352       y[16*(*idx)+14] += alpha15*(*v);
2353       y[16*(*idx)+15] += alpha16*(*v);
2354       idx++; v++;
2355     }
2356   }
2357   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2358   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2359   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2360   PetscFunctionReturn(0);
2361 }
2362 
2363 /*--------------------------------------------------------------------------------------------*/
2364 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2365 {
2366   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2367   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2368   const PetscScalar *x,*v;
2369   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2370   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2371   PetscErrorCode    ierr;
2372   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2373   PetscInt          nonzerorow=0,n,i,jrow,j;
2374 
2375   PetscFunctionBegin;
2376   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2377   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2378   idx  = a->j;
2379   v    = a->a;
2380   ii   = a->i;
2381 
2382   for (i=0; i<m; i++) {
2383     jrow  = ii[i];
2384     n     = ii[i+1] - jrow;
2385     sum1  = 0.0;
2386     sum2  = 0.0;
2387     sum3  = 0.0;
2388     sum4  = 0.0;
2389     sum5  = 0.0;
2390     sum6  = 0.0;
2391     sum7  = 0.0;
2392     sum8  = 0.0;
2393     sum9  = 0.0;
2394     sum10 = 0.0;
2395     sum11 = 0.0;
2396     sum12 = 0.0;
2397     sum13 = 0.0;
2398     sum14 = 0.0;
2399     sum15 = 0.0;
2400     sum16 = 0.0;
2401     sum17 = 0.0;
2402     sum18 = 0.0;
2403 
2404     nonzerorow += (n>0);
2405     for (j=0; j<n; j++) {
2406       sum1  += v[jrow]*x[18*idx[jrow]];
2407       sum2  += v[jrow]*x[18*idx[jrow]+1];
2408       sum3  += v[jrow]*x[18*idx[jrow]+2];
2409       sum4  += v[jrow]*x[18*idx[jrow]+3];
2410       sum5  += v[jrow]*x[18*idx[jrow]+4];
2411       sum6  += v[jrow]*x[18*idx[jrow]+5];
2412       sum7  += v[jrow]*x[18*idx[jrow]+6];
2413       sum8  += v[jrow]*x[18*idx[jrow]+7];
2414       sum9  += v[jrow]*x[18*idx[jrow]+8];
2415       sum10 += v[jrow]*x[18*idx[jrow]+9];
2416       sum11 += v[jrow]*x[18*idx[jrow]+10];
2417       sum12 += v[jrow]*x[18*idx[jrow]+11];
2418       sum13 += v[jrow]*x[18*idx[jrow]+12];
2419       sum14 += v[jrow]*x[18*idx[jrow]+13];
2420       sum15 += v[jrow]*x[18*idx[jrow]+14];
2421       sum16 += v[jrow]*x[18*idx[jrow]+15];
2422       sum17 += v[jrow]*x[18*idx[jrow]+16];
2423       sum18 += v[jrow]*x[18*idx[jrow]+17];
2424       jrow++;
2425     }
2426     y[18*i]    = sum1;
2427     y[18*i+1]  = sum2;
2428     y[18*i+2]  = sum3;
2429     y[18*i+3]  = sum4;
2430     y[18*i+4]  = sum5;
2431     y[18*i+5]  = sum6;
2432     y[18*i+6]  = sum7;
2433     y[18*i+7]  = sum8;
2434     y[18*i+8]  = sum9;
2435     y[18*i+9]  = sum10;
2436     y[18*i+10] = sum11;
2437     y[18*i+11] = sum12;
2438     y[18*i+12] = sum13;
2439     y[18*i+13] = sum14;
2440     y[18*i+14] = sum15;
2441     y[18*i+15] = sum16;
2442     y[18*i+16] = sum17;
2443     y[18*i+17] = sum18;
2444   }
2445 
2446   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
2447   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2448   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2453 {
2454   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2455   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2456   const PetscScalar *x,*v;
2457   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2458   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2459   PetscErrorCode    ierr;
2460   const PetscInt    m = b->AIJ->rmap->n,*idx;
2461   PetscInt          n,i;
2462 
2463   PetscFunctionBegin;
2464   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2465   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2466   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2467 
2468   for (i=0; i<m; i++) {
2469     idx     = a->j + a->i[i];
2470     v       = a->a + a->i[i];
2471     n       = a->i[i+1] - a->i[i];
2472     alpha1  = x[18*i];
2473     alpha2  = x[18*i+1];
2474     alpha3  = x[18*i+2];
2475     alpha4  = x[18*i+3];
2476     alpha5  = x[18*i+4];
2477     alpha6  = x[18*i+5];
2478     alpha7  = x[18*i+6];
2479     alpha8  = x[18*i+7];
2480     alpha9  = x[18*i+8];
2481     alpha10 = x[18*i+9];
2482     alpha11 = x[18*i+10];
2483     alpha12 = x[18*i+11];
2484     alpha13 = x[18*i+12];
2485     alpha14 = x[18*i+13];
2486     alpha15 = x[18*i+14];
2487     alpha16 = x[18*i+15];
2488     alpha17 = x[18*i+16];
2489     alpha18 = x[18*i+17];
2490     while (n-->0) {
2491       y[18*(*idx)]    += alpha1*(*v);
2492       y[18*(*idx)+1]  += alpha2*(*v);
2493       y[18*(*idx)+2]  += alpha3*(*v);
2494       y[18*(*idx)+3]  += alpha4*(*v);
2495       y[18*(*idx)+4]  += alpha5*(*v);
2496       y[18*(*idx)+5]  += alpha6*(*v);
2497       y[18*(*idx)+6]  += alpha7*(*v);
2498       y[18*(*idx)+7]  += alpha8*(*v);
2499       y[18*(*idx)+8]  += alpha9*(*v);
2500       y[18*(*idx)+9]  += alpha10*(*v);
2501       y[18*(*idx)+10] += alpha11*(*v);
2502       y[18*(*idx)+11] += alpha12*(*v);
2503       y[18*(*idx)+12] += alpha13*(*v);
2504       y[18*(*idx)+13] += alpha14*(*v);
2505       y[18*(*idx)+14] += alpha15*(*v);
2506       y[18*(*idx)+15] += alpha16*(*v);
2507       y[18*(*idx)+16] += alpha17*(*v);
2508       y[18*(*idx)+17] += alpha18*(*v);
2509       idx++; v++;
2510     }
2511   }
2512   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2513   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2514   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2519 {
2520   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2521   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2522   const PetscScalar *x,*v;
2523   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2524   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2525   PetscErrorCode    ierr;
2526   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2527   PetscInt          n,i,jrow,j;
2528 
2529   PetscFunctionBegin;
2530   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2531   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2532   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2533   idx  = a->j;
2534   v    = a->a;
2535   ii   = a->i;
2536 
2537   for (i=0; i<m; i++) {
2538     jrow  = ii[i];
2539     n     = ii[i+1] - jrow;
2540     sum1  = 0.0;
2541     sum2  = 0.0;
2542     sum3  = 0.0;
2543     sum4  = 0.0;
2544     sum5  = 0.0;
2545     sum6  = 0.0;
2546     sum7  = 0.0;
2547     sum8  = 0.0;
2548     sum9  = 0.0;
2549     sum10 = 0.0;
2550     sum11 = 0.0;
2551     sum12 = 0.0;
2552     sum13 = 0.0;
2553     sum14 = 0.0;
2554     sum15 = 0.0;
2555     sum16 = 0.0;
2556     sum17 = 0.0;
2557     sum18 = 0.0;
2558     for (j=0; j<n; j++) {
2559       sum1  += v[jrow]*x[18*idx[jrow]];
2560       sum2  += v[jrow]*x[18*idx[jrow]+1];
2561       sum3  += v[jrow]*x[18*idx[jrow]+2];
2562       sum4  += v[jrow]*x[18*idx[jrow]+3];
2563       sum5  += v[jrow]*x[18*idx[jrow]+4];
2564       sum6  += v[jrow]*x[18*idx[jrow]+5];
2565       sum7  += v[jrow]*x[18*idx[jrow]+6];
2566       sum8  += v[jrow]*x[18*idx[jrow]+7];
2567       sum9  += v[jrow]*x[18*idx[jrow]+8];
2568       sum10 += v[jrow]*x[18*idx[jrow]+9];
2569       sum11 += v[jrow]*x[18*idx[jrow]+10];
2570       sum12 += v[jrow]*x[18*idx[jrow]+11];
2571       sum13 += v[jrow]*x[18*idx[jrow]+12];
2572       sum14 += v[jrow]*x[18*idx[jrow]+13];
2573       sum15 += v[jrow]*x[18*idx[jrow]+14];
2574       sum16 += v[jrow]*x[18*idx[jrow]+15];
2575       sum17 += v[jrow]*x[18*idx[jrow]+16];
2576       sum18 += v[jrow]*x[18*idx[jrow]+17];
2577       jrow++;
2578     }
2579     y[18*i]    += sum1;
2580     y[18*i+1]  += sum2;
2581     y[18*i+2]  += sum3;
2582     y[18*i+3]  += sum4;
2583     y[18*i+4]  += sum5;
2584     y[18*i+5]  += sum6;
2585     y[18*i+6]  += sum7;
2586     y[18*i+7]  += sum8;
2587     y[18*i+8]  += sum9;
2588     y[18*i+9]  += sum10;
2589     y[18*i+10] += sum11;
2590     y[18*i+11] += sum12;
2591     y[18*i+12] += sum13;
2592     y[18*i+13] += sum14;
2593     y[18*i+14] += sum15;
2594     y[18*i+15] += sum16;
2595     y[18*i+16] += sum17;
2596     y[18*i+17] += sum18;
2597   }
2598 
2599   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2600   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2601   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2602   PetscFunctionReturn(0);
2603 }
2604 
2605 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2606 {
2607   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2608   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2609   const PetscScalar *x,*v;
2610   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2611   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2612   PetscErrorCode    ierr;
2613   const PetscInt    m = b->AIJ->rmap->n,*idx;
2614   PetscInt          n,i;
2615 
2616   PetscFunctionBegin;
2617   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2618   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2619   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2620   for (i=0; i<m; i++) {
2621     idx     = a->j + a->i[i];
2622     v       = a->a + a->i[i];
2623     n       = a->i[i+1] - a->i[i];
2624     alpha1  = x[18*i];
2625     alpha2  = x[18*i+1];
2626     alpha3  = x[18*i+2];
2627     alpha4  = x[18*i+3];
2628     alpha5  = x[18*i+4];
2629     alpha6  = x[18*i+5];
2630     alpha7  = x[18*i+6];
2631     alpha8  = x[18*i+7];
2632     alpha9  = x[18*i+8];
2633     alpha10 = x[18*i+9];
2634     alpha11 = x[18*i+10];
2635     alpha12 = x[18*i+11];
2636     alpha13 = x[18*i+12];
2637     alpha14 = x[18*i+13];
2638     alpha15 = x[18*i+14];
2639     alpha16 = x[18*i+15];
2640     alpha17 = x[18*i+16];
2641     alpha18 = x[18*i+17];
2642     while (n-->0) {
2643       y[18*(*idx)]    += alpha1*(*v);
2644       y[18*(*idx)+1]  += alpha2*(*v);
2645       y[18*(*idx)+2]  += alpha3*(*v);
2646       y[18*(*idx)+3]  += alpha4*(*v);
2647       y[18*(*idx)+4]  += alpha5*(*v);
2648       y[18*(*idx)+5]  += alpha6*(*v);
2649       y[18*(*idx)+6]  += alpha7*(*v);
2650       y[18*(*idx)+7]  += alpha8*(*v);
2651       y[18*(*idx)+8]  += alpha9*(*v);
2652       y[18*(*idx)+9]  += alpha10*(*v);
2653       y[18*(*idx)+10] += alpha11*(*v);
2654       y[18*(*idx)+11] += alpha12*(*v);
2655       y[18*(*idx)+12] += alpha13*(*v);
2656       y[18*(*idx)+13] += alpha14*(*v);
2657       y[18*(*idx)+14] += alpha15*(*v);
2658       y[18*(*idx)+15] += alpha16*(*v);
2659       y[18*(*idx)+16] += alpha17*(*v);
2660       y[18*(*idx)+17] += alpha18*(*v);
2661       idx++; v++;
2662     }
2663   }
2664   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2665   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2666   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2667   PetscFunctionReturn(0);
2668 }
2669 
2670 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2671 {
2672   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2673   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2674   const PetscScalar *x,*v;
2675   PetscScalar       *y,*sums;
2676   PetscErrorCode    ierr;
2677   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2678   PetscInt          n,i,jrow,j,dof = b->dof,k;
2679 
2680   PetscFunctionBegin;
2681   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2682   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2683   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2684   idx  = a->j;
2685   v    = a->a;
2686   ii   = a->i;
2687 
2688   for (i=0; i<m; i++) {
2689     jrow = ii[i];
2690     n    = ii[i+1] - jrow;
2691     sums = y + dof*i;
2692     for (j=0; j<n; j++) {
2693       for (k=0; k<dof; k++) {
2694         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2695       }
2696       jrow++;
2697     }
2698   }
2699 
2700   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2701   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2702   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2703   PetscFunctionReturn(0);
2704 }
2705 
2706 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2707 {
2708   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2709   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2710   const PetscScalar *x,*v;
2711   PetscScalar       *y,*sums;
2712   PetscErrorCode    ierr;
2713   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2714   PetscInt          n,i,jrow,j,dof = b->dof,k;
2715 
2716   PetscFunctionBegin;
2717   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2718   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2719   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2720   idx  = a->j;
2721   v    = a->a;
2722   ii   = a->i;
2723 
2724   for (i=0; i<m; i++) {
2725     jrow = ii[i];
2726     n    = ii[i+1] - jrow;
2727     sums = y + dof*i;
2728     for (j=0; j<n; j++) {
2729       for (k=0; k<dof; k++) {
2730         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2731       }
2732       jrow++;
2733     }
2734   }
2735 
2736   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2737   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2738   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2739   PetscFunctionReturn(0);
2740 }
2741 
2742 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2743 {
2744   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2745   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2746   const PetscScalar *x,*v,*alpha;
2747   PetscScalar       *y;
2748   PetscErrorCode    ierr;
2749   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2750   PetscInt          n,i,k;
2751 
2752   PetscFunctionBegin;
2753   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2754   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2755   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2756   for (i=0; i<m; i++) {
2757     idx   = a->j + a->i[i];
2758     v     = a->a + a->i[i];
2759     n     = a->i[i+1] - a->i[i];
2760     alpha = x + dof*i;
2761     while (n-->0) {
2762       for (k=0; k<dof; k++) {
2763         y[dof*(*idx)+k] += alpha[k]*(*v);
2764       }
2765       idx++; v++;
2766     }
2767   }
2768   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2769   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2770   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2771   PetscFunctionReturn(0);
2772 }
2773 
2774 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2775 {
2776   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2777   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2778   const PetscScalar *x,*v,*alpha;
2779   PetscScalar       *y;
2780   PetscErrorCode    ierr;
2781   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2782   PetscInt          n,i,k;
2783 
2784   PetscFunctionBegin;
2785   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2786   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2787   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2788   for (i=0; i<m; i++) {
2789     idx   = a->j + a->i[i];
2790     v     = a->a + a->i[i];
2791     n     = a->i[i+1] - a->i[i];
2792     alpha = x + dof*i;
2793     while (n-->0) {
2794       for (k=0; k<dof; k++) {
2795         y[dof*(*idx)+k] += alpha[k]*(*v);
2796       }
2797       idx++; v++;
2798     }
2799   }
2800   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2801   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2802   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2803   PetscFunctionReturn(0);
2804 }
2805 
2806 /*===================================================================================*/
2807 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2808 {
2809   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2810   PetscErrorCode ierr;
2811 
2812   PetscFunctionBegin;
2813   /* start the scatter */
2814   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2815   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2816   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2817   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2818   PetscFunctionReturn(0);
2819 }
2820 
2821 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2822 {
2823   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2824   PetscErrorCode ierr;
2825 
2826   PetscFunctionBegin;
2827   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2828   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2829   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2830   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2831   PetscFunctionReturn(0);
2832 }
2833 
2834 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2835 {
2836   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2837   PetscErrorCode ierr;
2838 
2839   PetscFunctionBegin;
2840   /* start the scatter */
2841   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2842   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2843   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2844   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2845   PetscFunctionReturn(0);
2846 }
2847 
2848 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2849 {
2850   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2851   PetscErrorCode ierr;
2852 
2853   PetscFunctionBegin;
2854   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2855   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2856   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2857   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2858   PetscFunctionReturn(0);
2859 }
2860 
2861 /* ----------------------------------------------------------------*/
2862 PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
2863 {
2864   Mat_Product    *product = C->product;
2865 
2866   PetscFunctionBegin;
2867   if (product->type == MATPRODUCT_PtAP) {
2868     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2869   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]);
2870   PetscFunctionReturn(0);
2871 }
2872 
2873 PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
2874 {
2875   PetscErrorCode ierr;
2876   Mat_Product    *product = C->product;
2877   PetscBool      flg = PETSC_FALSE;
2878   Mat            A=product->A,P=product->B;
2879   PetscInt       alg=1; /* set default algorithm */
2880 #if !defined(PETSC_HAVE_HYPRE)
2881   const char     *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"};
2882   PetscInt       nalg=4;
2883 #else
2884   const char     *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"};
2885   PetscInt       nalg=5;
2886 #endif
2887 
2888   PetscFunctionBegin;
2889   PetscCheckFalse(product->type != MATPRODUCT_PtAP,PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]);
2890 
2891   /* PtAP */
2892   /* Check matrix local sizes */
2893   PetscCheckFalse(A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend);
2894   PetscCheckFalse(A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend);
2895 
2896   /* Set the default algorithm */
2897   ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr);
2898   if (flg) {
2899     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2900   }
2901 
2902   /* Get runtime option */
2903   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
2904   ierr = PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2905   if (flg) {
2906     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2907   }
2908   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2909 
2910   ierr = PetscStrcmp(C->product->alg,"allatonce",&flg);CHKERRQ(ierr);
2911   if (flg) {
2912     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2913     PetscFunctionReturn(0);
2914   }
2915 
2916   ierr = PetscStrcmp(C->product->alg,"allatonce_merged",&flg);CHKERRQ(ierr);
2917   if (flg) {
2918     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2919     PetscFunctionReturn(0);
2920   }
2921 
2922   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
2923   ierr = PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n");CHKERRQ(ierr);
2924   ierr = MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);CHKERRQ(ierr);
2925   ierr = MatProductSetFromOptions(C);CHKERRQ(ierr);
2926   PetscFunctionReturn(0);
2927 }
2928 
2929 /* ----------------------------------------------------------------*/
2930 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)
2931 {
2932   PetscErrorCode     ierr;
2933   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2934   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2935   Mat                P         =pp->AIJ;
2936   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2937   PetscInt           *pti,*ptj,*ptJ;
2938   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2939   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2940   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2941   MatScalar          *ca;
2942   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2943 
2944   PetscFunctionBegin;
2945   /* Get ij structure of P^T */
2946   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2947 
2948   cn = pn*ppdof;
2949   /* Allocate ci array, arrays for fill computation and */
2950   /* free space for accumulating nonzero column info */
2951   ierr  = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr);
2952   ci[0] = 0;
2953 
2954   /* Work arrays for rows of P^T*A */
2955   ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr);
2956   ierr = PetscArrayzero(ptadenserow,an);CHKERRQ(ierr);
2957   ierr = PetscArrayzero(denserow,cn);CHKERRQ(ierr);
2958 
2959   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2960   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2961   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2962   ierr          = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr);
2963   current_space = free_space;
2964 
2965   /* Determine symbolic info for each row of C: */
2966   for (i=0; i<pn; i++) {
2967     ptnzi = pti[i+1] - pti[i];
2968     ptJ   = ptj + pti[i];
2969     for (dof=0; dof<ppdof; dof++) {
2970       ptanzi = 0;
2971       /* Determine symbolic row of PtA: */
2972       for (j=0; j<ptnzi; j++) {
2973         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2974         arow = ptJ[j]*ppdof + dof;
2975         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2976         anzj = ai[arow+1] - ai[arow];
2977         ajj  = aj + ai[arow];
2978         for (k=0; k<anzj; k++) {
2979           if (!ptadenserow[ajj[k]]) {
2980             ptadenserow[ajj[k]]    = -1;
2981             ptasparserow[ptanzi++] = ajj[k];
2982           }
2983         }
2984       }
2985       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2986       ptaj = ptasparserow;
2987       cnzi = 0;
2988       for (j=0; j<ptanzi; j++) {
2989         /* Get offset within block of P */
2990         pshift = *ptaj%ppdof;
2991         /* Get block row of P */
2992         prow = (*ptaj++)/ppdof; /* integer division */
2993         /* P has same number of nonzeros per row as the compressed form */
2994         pnzj = pi[prow+1] - pi[prow];
2995         pjj  = pj + pi[prow];
2996         for (k=0;k<pnzj;k++) {
2997           /* Locations in C are shifted by the offset within the block */
2998           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2999           if (!denserow[pjj[k]*ppdof+pshift]) {
3000             denserow[pjj[k]*ppdof+pshift] = -1;
3001             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
3002           }
3003         }
3004       }
3005 
3006       /* sort sparserow */
3007       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
3008 
3009       /* If free space is not available, make more free space */
3010       /* Double the amount of total space in the list */
3011       if (current_space->local_remaining<cnzi) {
3012         ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
3013       }
3014 
3015       /* Copy data into free space, and zero out denserows */
3016       ierr = PetscArraycpy(current_space->array,sparserow,cnzi);CHKERRQ(ierr);
3017 
3018       current_space->array           += cnzi;
3019       current_space->local_used      += cnzi;
3020       current_space->local_remaining -= cnzi;
3021 
3022       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
3023       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
3024 
3025       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3026       /*        For now, we will recompute what is needed. */
3027       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3028     }
3029   }
3030   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3031   /* Allocate space for cj, initialize cj, and */
3032   /* destroy list of free space and other temporary array(s) */
3033   ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr);
3034   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
3035   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
3036 
3037   /* Allocate space for ca */
3038   ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr);
3039 
3040   /* put together the new matrix */
3041   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C);CHKERRQ(ierr);
3042   ierr = MatSetBlockSize(C,pp->dof);CHKERRQ(ierr);
3043 
3044   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3045   /* Since these are PETSc arrays, change flags to free them as necessary. */
3046   c          = (Mat_SeqAIJ*)(C->data);
3047   c->free_a  = PETSC_TRUE;
3048   c->free_ij = PETSC_TRUE;
3049   c->nonew   = 0;
3050 
3051   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3052   C->ops->productnumeric = MatProductNumeric_PtAP;
3053 
3054   /* Clean up. */
3055   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
3056   PetscFunctionReturn(0);
3057 }
3058 
3059 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3060 {
3061   /* This routine requires testing -- first draft only */
3062   PetscErrorCode  ierr;
3063   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3064   Mat             P  =pp->AIJ;
3065   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
3066   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
3067   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3068   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3069   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3070   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3071   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3072   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3073   MatScalar       *ca=c->a,*caj,*apa;
3074 
3075   PetscFunctionBegin;
3076   /* Allocate temporary array for storage of one row of A*P */
3077   ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr);
3078 
3079   /* Clear old values in C */
3080   ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr);
3081 
3082   for (i=0; i<am; i++) {
3083     /* Form sparse row of A*P */
3084     anzi  = ai[i+1] - ai[i];
3085     apnzj = 0;
3086     for (j=0; j<anzi; j++) {
3087       /* Get offset within block of P */
3088       pshift = *aj%ppdof;
3089       /* Get block row of P */
3090       prow = *aj++/ppdof;   /* integer division */
3091       pnzj = pi[prow+1] - pi[prow];
3092       pjj  = pj + pi[prow];
3093       paj  = pa + pi[prow];
3094       for (k=0; k<pnzj; k++) {
3095         poffset = pjj[k]*ppdof+pshift;
3096         if (!apjdense[poffset]) {
3097           apjdense[poffset] = -1;
3098           apj[apnzj++]      = poffset;
3099         }
3100         apa[poffset] += (*aa)*paj[k];
3101       }
3102       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
3103       aa++;
3104     }
3105 
3106     /* Sort the j index array for quick sparse axpy. */
3107     /* Note: a array does not need sorting as it is in dense storage locations. */
3108     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
3109 
3110     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3111     prow    = i/ppdof; /* integer division */
3112     pshift  = i%ppdof;
3113     poffset = pi[prow];
3114     pnzi    = pi[prow+1] - poffset;
3115     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3116     pJ = pj+poffset;
3117     pA = pa+poffset;
3118     for (j=0; j<pnzi; j++) {
3119       crow = (*pJ)*ppdof+pshift;
3120       cjj  = cj + ci[crow];
3121       caj  = ca + ci[crow];
3122       pJ++;
3123       /* Perform sparse axpy operation.  Note cjj includes apj. */
3124       for (k=0,nextap=0; nextap<apnzj; k++) {
3125         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3126       }
3127       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
3128       pA++;
3129     }
3130 
3131     /* Zero the current row info for A*P */
3132     for (j=0; j<apnzj; j++) {
3133       apa[apj[j]]      = 0.;
3134       apjdense[apj[j]] = 0;
3135     }
3136   }
3137 
3138   /* Assemble the final matrix and clean up */
3139   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3140   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3141   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
3142   PetscFunctionReturn(0);
3143 }
3144 
3145 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
3146 {
3147   PetscErrorCode      ierr;
3148   Mat_Product         *product = C->product;
3149   Mat                 A=product->A,P=product->B;
3150 
3151   PetscFunctionBegin;
3152   ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C);CHKERRQ(ierr);
3153   PetscFunctionReturn(0);
3154 }
3155 
3156 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3157 {
3158   PetscFunctionBegin;
3159   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3160 }
3161 
3162 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3163 {
3164   PetscFunctionBegin;
3165   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3166 }
3167 
3168 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);
3169 
3170 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3171 {
3172   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3173   PetscErrorCode  ierr;
3174 
3175   PetscFunctionBegin;
3176 
3177   ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);CHKERRQ(ierr);
3178   PetscFunctionReturn(0);
3179 }
3180 
3181 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat);
3182 
3183 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)
3184 {
3185   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3186   PetscErrorCode  ierr;
3187 
3188   PetscFunctionBegin;
3189   ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr);
3190   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3191   PetscFunctionReturn(0);
3192 }
3193 
3194 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);
3195 
3196 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3197 {
3198   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3199   PetscErrorCode  ierr;
3200 
3201   PetscFunctionBegin;
3202 
3203   ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);CHKERRQ(ierr);
3204   PetscFunctionReturn(0);
3205 }
3206 
3207 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat);
3208 
3209 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)
3210 {
3211   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3212   PetscErrorCode  ierr;
3213 
3214   PetscFunctionBegin;
3215 
3216   ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr);
3217   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3218   PetscFunctionReturn(0);
3219 }
3220 
3221 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3222 {
3223   PetscErrorCode      ierr;
3224   Mat_Product         *product = C->product;
3225   Mat                 A=product->A,P=product->B;
3226   PetscBool           flg;
3227 
3228   PetscFunctionBegin;
3229   ierr = PetscStrcmp(product->alg,"allatonce",&flg);CHKERRQ(ierr);
3230   if (flg) {
3231     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C);CHKERRQ(ierr);
3232     C->ops->productnumeric = MatProductNumeric_PtAP;
3233     PetscFunctionReturn(0);
3234   }
3235 
3236   ierr = PetscStrcmp(product->alg,"allatonce_merged",&flg);CHKERRQ(ierr);
3237   if (flg) {
3238     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C);CHKERRQ(ierr);
3239     C->ops->productnumeric = MatProductNumeric_PtAP;
3240     PetscFunctionReturn(0);
3241   }
3242 
3243   SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
3244 }
3245 
3246 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3247 {
3248   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3249   Mat            a    = b->AIJ,B;
3250   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3251   PetscErrorCode ierr;
3252   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3253   PetscInt       *cols;
3254   PetscScalar    *vals;
3255 
3256   PetscFunctionBegin;
3257   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3258   ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr);
3259   for (i=0; i<m; i++) {
3260     nmax = PetscMax(nmax,aij->ilen[i]);
3261     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3262   }
3263   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3264   ierr = MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);CHKERRQ(ierr);
3265   ierr = MatSetType(B,newtype);CHKERRQ(ierr);
3266   ierr = MatSeqAIJSetPreallocation(B,0,ilen);CHKERRQ(ierr);
3267   ierr = PetscFree(ilen);CHKERRQ(ierr);
3268   ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr);
3269   ii   = 0;
3270   for (i=0; i<m; i++) {
3271     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3272     for (j=0; j<dof; j++) {
3273       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3274       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3275       ii++;
3276     }
3277     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3278   }
3279   ierr = PetscFree(icols);CHKERRQ(ierr);
3280   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3281   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3282 
3283   if (reuse == MAT_INPLACE_MATRIX) {
3284     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3285   } else {
3286     *newmat = B;
3287   }
3288   PetscFunctionReturn(0);
3289 }
3290 
3291 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3292 
3293 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3294 {
3295   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3296   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3297   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3298   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3299   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3300   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3301   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3302   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3303   PetscInt       rstart,cstart,*garray,ii,k;
3304   PetscErrorCode ierr;
3305   PetscScalar    *vals,*ovals;
3306 
3307   PetscFunctionBegin;
3308   ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr);
3309   for (i=0; i<A->rmap->n/dof; i++) {
3310     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3311     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3312     for (j=0; j<dof; j++) {
3313       dnz[dof*i+j] = AIJ->ilen[i];
3314       onz[dof*i+j] = OAIJ->ilen[i];
3315     }
3316   }
3317   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3318   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3319   ierr = MatSetType(B,newtype);CHKERRQ(ierr);
3320   ierr = MatMPIAIJSetPreallocation(B,0,dnz,0,onz);CHKERRQ(ierr);
3321   ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr);
3322   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
3323 
3324   ierr   = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr);
3325   rstart = dof*maij->A->rmap->rstart;
3326   cstart = dof*maij->A->cmap->rstart;
3327   garray = mpiaij->garray;
3328 
3329   ii = rstart;
3330   for (i=0; i<A->rmap->n/dof; i++) {
3331     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3332     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3333     for (j=0; j<dof; j++) {
3334       for (k=0; k<ncols; k++) {
3335         icols[k] = cstart + dof*cols[k]+j;
3336       }
3337       for (k=0; k<oncols; k++) {
3338         oicols[k] = dof*garray[ocols[k]]+j;
3339       }
3340       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3341       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
3342       ii++;
3343     }
3344     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3345     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3346   }
3347   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
3348 
3349   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3350   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3351 
3352   if (reuse == MAT_INPLACE_MATRIX) {
3353     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3354     ((PetscObject)A)->refct = 1;
3355 
3356     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3357 
3358     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3359   } else {
3360     *newmat = B;
3361   }
3362   PetscFunctionReturn(0);
3363 }
3364 
3365 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3366 {
3367   PetscErrorCode ierr;
3368   Mat            A;
3369 
3370   PetscFunctionBegin;
3371   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3372   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
3373   ierr = MatDestroy(&A);CHKERRQ(ierr);
3374   PetscFunctionReturn(0);
3375 }
3376 
3377 PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3378 {
3379   PetscErrorCode ierr;
3380   Mat            A;
3381 
3382   PetscFunctionBegin;
3383   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3384   ierr = MatCreateSubMatrices(A,n,irow,icol,scall,submat);CHKERRQ(ierr);
3385   ierr = MatDestroy(&A);CHKERRQ(ierr);
3386   PetscFunctionReturn(0);
3387 }
3388 
3389 /* ---------------------------------------------------------------------------------- */
3390 /*@
3391   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3392   operations for multicomponent problems.  It interpolates each component the same
3393   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3394   and MATMPIAIJ for distributed matrices.
3395 
3396   Collective
3397 
3398   Input Parameters:
3399 + A - the AIJ matrix describing the action on blocks
3400 - dof - the block size (number of components per node)
3401 
3402   Output Parameter:
3403 . maij - the new MAIJ matrix
3404 
3405   Operations provided:
3406 .vb
3407     MatMult
3408     MatMultTranspose
3409     MatMultAdd
3410     MatMultTransposeAdd
3411     MatView
3412 .ve
3413 
3414   Level: advanced
3415 
3416 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3417 @*/
3418 PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3419 {
3420   PetscErrorCode ierr;
3421   PetscMPIInt    size;
3422   PetscInt       n;
3423   Mat            B;
3424 #if defined(PETSC_HAVE_CUDA)
3425   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3426   PetscBool      convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3427 #endif
3428 
3429   PetscFunctionBegin;
3430   dof  = PetscAbs(dof);
3431   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3432 
3433   if (dof == 1) *maij = A;
3434   else {
3435     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3436     /* propagate vec type */
3437     ierr = MatSetVecType(B,A->defaultvectype);CHKERRQ(ierr);
3438     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3439     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3440     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3441     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3442     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3443 
3444     B->assembled = PETSC_TRUE;
3445 
3446     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3447     if (size == 1) {
3448       Mat_SeqMAIJ *b;
3449 
3450       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
3451 
3452       B->ops->setup   = NULL;
3453       B->ops->destroy = MatDestroy_SeqMAIJ;
3454       B->ops->view    = MatView_SeqMAIJ;
3455 
3456       b               = (Mat_SeqMAIJ*)B->data;
3457       b->dof          = dof;
3458       b->AIJ          = A;
3459 
3460       if (dof == 2) {
3461         B->ops->mult             = MatMult_SeqMAIJ_2;
3462         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3463         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3464         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3465       } else if (dof == 3) {
3466         B->ops->mult             = MatMult_SeqMAIJ_3;
3467         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3468         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3469         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3470       } else if (dof == 4) {
3471         B->ops->mult             = MatMult_SeqMAIJ_4;
3472         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3473         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3474         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3475       } else if (dof == 5) {
3476         B->ops->mult             = MatMult_SeqMAIJ_5;
3477         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3478         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3479         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3480       } else if (dof == 6) {
3481         B->ops->mult             = MatMult_SeqMAIJ_6;
3482         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3483         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3484         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3485       } else if (dof == 7) {
3486         B->ops->mult             = MatMult_SeqMAIJ_7;
3487         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3488         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3489         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3490       } else if (dof == 8) {
3491         B->ops->mult             = MatMult_SeqMAIJ_8;
3492         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3493         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3494         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3495       } else if (dof == 9) {
3496         B->ops->mult             = MatMult_SeqMAIJ_9;
3497         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3498         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3499         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3500       } else if (dof == 10) {
3501         B->ops->mult             = MatMult_SeqMAIJ_10;
3502         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3503         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3504         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3505       } else if (dof == 11) {
3506         B->ops->mult             = MatMult_SeqMAIJ_11;
3507         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3508         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3509         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3510       } else if (dof == 16) {
3511         B->ops->mult             = MatMult_SeqMAIJ_16;
3512         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3513         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3514         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3515       } else if (dof == 18) {
3516         B->ops->mult             = MatMult_SeqMAIJ_18;
3517         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3518         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3519         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3520       } else {
3521         B->ops->mult             = MatMult_SeqMAIJ_N;
3522         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3523         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3524         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3525       }
3526 #if defined(PETSC_HAVE_CUDA)
3527       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3528 #endif
3529       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3530       ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3531     } else {
3532       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3533       Mat_MPIMAIJ *b;
3534       IS          from,to;
3535       Vec         gvec;
3536 
3537       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3538 
3539       B->ops->setup            = NULL;
3540       B->ops->destroy          = MatDestroy_MPIMAIJ;
3541       B->ops->view             = MatView_MPIMAIJ;
3542 
3543       b      = (Mat_MPIMAIJ*)B->data;
3544       b->dof = dof;
3545       b->A   = A;
3546 
3547       ierr = MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);CHKERRQ(ierr);
3548       ierr = MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);CHKERRQ(ierr);
3549 
3550       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3551       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3552       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
3553       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3554       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3555 
3556       /* create two temporary Index sets for build scatter gather */
3557       ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3558       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3559 
3560       /* create temporary global vector to generate scatter context */
3561       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
3562 
3563       /* generate the scatter context */
3564       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3565 
3566       ierr = ISDestroy(&from);CHKERRQ(ierr);
3567       ierr = ISDestroy(&to);CHKERRQ(ierr);
3568       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3569 
3570       B->ops->mult             = MatMult_MPIMAIJ_dof;
3571       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3572       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3573       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3574 
3575 #if defined(PETSC_HAVE_CUDA)
3576       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3577 #endif
3578       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3579       ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ);CHKERRQ(ierr);
3580     }
3581     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3582     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3583     ierr = MatSetUp(B);CHKERRQ(ierr);
3584 #if defined(PETSC_HAVE_CUDA)
3585     /* temporary until we have CUDA implementation of MAIJ */
3586     {
3587       PetscBool flg;
3588       if (convert) {
3589         ierr = PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");CHKERRQ(ierr);
3590         if (flg) {
3591           ierr = MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
3592         }
3593       }
3594     }
3595 #endif
3596     *maij = B;
3597   }
3598   PetscFunctionReturn(0);
3599 }
3600