xref: /petsc/src/mat/impls/maij/maij.c (revision ccb4e88a40f0b86eaeca07ff64c64e4de2fae686)
1 
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20 #include <../src/mat/utils/freespace.h>
21 
22 /*@
23    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
24 
25    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
26 
27    Input Parameter:
28 .  A - the MAIJ matrix
29 
30    Output Parameter:
31 .  B - the AIJ matrix
32 
33    Level: advanced
34 
35    Notes:
36     The reference count on the AIJ matrix is not increased so you should not destroy it.
37 
38 .seealso: MatCreateMAIJ()
39 @*/
40 PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
41 {
42   PetscErrorCode ierr;
43   PetscBool      ismpimaij,isseqmaij;
44 
45   PetscFunctionBegin;
46   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
47   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
48   if (ismpimaij) {
49     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
50 
51     *B = b->A;
52   } else if (isseqmaij) {
53     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
54 
55     *B = b->AIJ;
56   } else {
57     *B = A;
58   }
59   PetscFunctionReturn(0);
60 }
61 
62 /*@
63    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
64 
65    Logically Collective
66 
67    Input Parameters:
68 +  A - the MAIJ matrix
69 -  dof - the block size for the new matrix
70 
71    Output Parameter:
72 .  B - the new MAIJ matrix
73 
74    Level: advanced
75 
76 .seealso: MatCreateMAIJ()
77 @*/
78 PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
79 {
80   PetscErrorCode ierr;
81   Mat            Aij = NULL;
82 
83   PetscFunctionBegin;
84   PetscValidLogicalCollectiveInt(A,dof,2);
85   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
86   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
91 {
92   PetscErrorCode ierr;
93   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
94 
95   PetscFunctionBegin;
96   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
97   ierr = PetscFree(A->data);CHKERRQ(ierr);
98   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr);
99   ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL);CHKERRQ(ierr);
100   PetscFunctionReturn(0);
101 }
102 
103 PetscErrorCode MatSetUp_MAIJ(Mat A)
104 {
105   PetscFunctionBegin;
106   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
107 }
108 
109 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
110 {
111   PetscErrorCode ierr;
112   Mat            B;
113 
114   PetscFunctionBegin;
115   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
116   ierr = MatView(B,viewer);CHKERRQ(ierr);
117   ierr = MatDestroy(&B);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
122 {
123   PetscErrorCode ierr;
124   Mat            B;
125 
126   PetscFunctionBegin;
127   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
128   ierr = MatView(B,viewer);CHKERRQ(ierr);
129   ierr = MatDestroy(&B);CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
134 {
135   PetscErrorCode ierr;
136   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
137 
138   PetscFunctionBegin;
139   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
140   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
141   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
142   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
143   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
144   ierr = PetscFree(A->data);CHKERRQ(ierr);
145   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr);
146   ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr);
147   ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr);
148   PetscFunctionReturn(0);
149 }
150 
151 /*MC
152   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
153   multicomponent problems, interpolating or restricting each component the same way independently.
154   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
155 
156   Operations provided:
157 + MatMult
158 . MatMultTranspose
159 . MatMultAdd
160 - MatMultTransposeAdd
161 
162   Level: advanced
163 
164 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
165 M*/
166 
167 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
168 {
169   PetscErrorCode ierr;
170   Mat_MPIMAIJ    *b;
171   PetscMPIInt    size;
172 
173   PetscFunctionBegin;
174   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
175   A->data  = (void*)b;
176 
177   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
178 
179   A->ops->setup = MatSetUp_MAIJ;
180 
181   b->AIJ  = NULL;
182   b->dof  = 0;
183   b->OAIJ = NULL;
184   b->ctx  = NULL;
185   b->w    = NULL;
186   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
187   if (size == 1) {
188     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
189   } else {
190     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
191   }
192   A->preallocated  = PETSC_TRUE;
193   A->assembled     = PETSC_TRUE;
194   PetscFunctionReturn(0);
195 }
196 
197 /* --------------------------------------------------------------------------------------*/
198 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
199 {
200   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
201   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
202   const PetscScalar *x,*v;
203   PetscScalar       *y, sum1, sum2;
204   PetscErrorCode    ierr;
205   PetscInt          nonzerorow=0,n,i,jrow,j;
206   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
207 
208   PetscFunctionBegin;
209   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
210   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
211   idx  = a->j;
212   v    = a->a;
213   ii   = a->i;
214 
215   for (i=0; i<m; i++) {
216     jrow  = ii[i];
217     n     = ii[i+1] - jrow;
218     sum1  = 0.0;
219     sum2  = 0.0;
220 
221     nonzerorow += (n>0);
222     for (j=0; j<n; j++) {
223       sum1 += v[jrow]*x[2*idx[jrow]];
224       sum2 += v[jrow]*x[2*idx[jrow]+1];
225       jrow++;
226     }
227     y[2*i]   = sum1;
228     y[2*i+1] = sum2;
229   }
230 
231   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
232   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
233   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
234   PetscFunctionReturn(0);
235 }
236 
237 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
238 {
239   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
240   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
241   const PetscScalar *x,*v;
242   PetscScalar       *y,alpha1,alpha2;
243   PetscErrorCode    ierr;
244   PetscInt          n,i;
245   const PetscInt    m = b->AIJ->rmap->n,*idx;
246 
247   PetscFunctionBegin;
248   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
249   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
250   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
251 
252   for (i=0; i<m; i++) {
253     idx    = a->j + a->i[i];
254     v      = a->a + a->i[i];
255     n      = a->i[i+1] - a->i[i];
256     alpha1 = x[2*i];
257     alpha2 = x[2*i+1];
258     while (n-->0) {
259       y[2*(*idx)]   += alpha1*(*v);
260       y[2*(*idx)+1] += alpha2*(*v);
261       idx++; v++;
262     }
263   }
264   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
265   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
266   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
271 {
272   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
273   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
274   const PetscScalar *x,*v;
275   PetscScalar       *y,sum1, sum2;
276   PetscErrorCode    ierr;
277   PetscInt          n,i,jrow,j;
278   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
279 
280   PetscFunctionBegin;
281   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
282   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
283   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
284   idx  = a->j;
285   v    = a->a;
286   ii   = a->i;
287 
288   for (i=0; i<m; i++) {
289     jrow = ii[i];
290     n    = ii[i+1] - jrow;
291     sum1 = 0.0;
292     sum2 = 0.0;
293     for (j=0; j<n; j++) {
294       sum1 += v[jrow]*x[2*idx[jrow]];
295       sum2 += v[jrow]*x[2*idx[jrow]+1];
296       jrow++;
297     }
298     y[2*i]   += sum1;
299     y[2*i+1] += sum2;
300   }
301 
302   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
303   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
304   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
305   PetscFunctionReturn(0);
306 }
307 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
308 {
309   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
310   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
311   const PetscScalar *x,*v;
312   PetscScalar       *y,alpha1,alpha2;
313   PetscErrorCode    ierr;
314   PetscInt          n,i;
315   const PetscInt    m = b->AIJ->rmap->n,*idx;
316 
317   PetscFunctionBegin;
318   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
319   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
320   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
321 
322   for (i=0; i<m; i++) {
323     idx    = a->j + a->i[i];
324     v      = a->a + a->i[i];
325     n      = a->i[i+1] - a->i[i];
326     alpha1 = x[2*i];
327     alpha2 = x[2*i+1];
328     while (n-->0) {
329       y[2*(*idx)]   += alpha1*(*v);
330       y[2*(*idx)+1] += alpha2*(*v);
331       idx++; v++;
332     }
333   }
334   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
335   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
336   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
337   PetscFunctionReturn(0);
338 }
339 /* --------------------------------------------------------------------------------------*/
340 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
341 {
342   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
343   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
344   const PetscScalar *x,*v;
345   PetscScalar       *y,sum1, sum2, sum3;
346   PetscErrorCode    ierr;
347   PetscInt          nonzerorow=0,n,i,jrow,j;
348   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
349 
350   PetscFunctionBegin;
351   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
352   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
353   idx  = a->j;
354   v    = a->a;
355   ii   = a->i;
356 
357   for (i=0; i<m; i++) {
358     jrow  = ii[i];
359     n     = ii[i+1] - jrow;
360     sum1  = 0.0;
361     sum2  = 0.0;
362     sum3  = 0.0;
363 
364     nonzerorow += (n>0);
365     for (j=0; j<n; j++) {
366       sum1 += v[jrow]*x[3*idx[jrow]];
367       sum2 += v[jrow]*x[3*idx[jrow]+1];
368       sum3 += v[jrow]*x[3*idx[jrow]+2];
369       jrow++;
370      }
371     y[3*i]   = sum1;
372     y[3*i+1] = sum2;
373     y[3*i+2] = sum3;
374   }
375 
376   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
377   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
378   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
379   PetscFunctionReturn(0);
380 }
381 
382 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
383 {
384   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
385   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
386   const PetscScalar *x,*v;
387   PetscScalar       *y,alpha1,alpha2,alpha3;
388   PetscErrorCode    ierr;
389   PetscInt          n,i;
390   const PetscInt    m = b->AIJ->rmap->n,*idx;
391 
392   PetscFunctionBegin;
393   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
394   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
395   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
396 
397   for (i=0; i<m; i++) {
398     idx    = a->j + a->i[i];
399     v      = a->a + a->i[i];
400     n      = a->i[i+1] - a->i[i];
401     alpha1 = x[3*i];
402     alpha2 = x[3*i+1];
403     alpha3 = x[3*i+2];
404     while (n-->0) {
405       y[3*(*idx)]   += alpha1*(*v);
406       y[3*(*idx)+1] += alpha2*(*v);
407       y[3*(*idx)+2] += alpha3*(*v);
408       idx++; v++;
409     }
410   }
411   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
412   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
413   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
418 {
419   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
420   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
421   const PetscScalar *x,*v;
422   PetscScalar       *y,sum1, sum2, sum3;
423   PetscErrorCode    ierr;
424   PetscInt          n,i,jrow,j;
425   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
426 
427   PetscFunctionBegin;
428   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
429   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
430   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
431   idx  = a->j;
432   v    = a->a;
433   ii   = a->i;
434 
435   for (i=0; i<m; i++) {
436     jrow = ii[i];
437     n    = ii[i+1] - jrow;
438     sum1 = 0.0;
439     sum2 = 0.0;
440     sum3 = 0.0;
441     for (j=0; j<n; j++) {
442       sum1 += v[jrow]*x[3*idx[jrow]];
443       sum2 += v[jrow]*x[3*idx[jrow]+1];
444       sum3 += v[jrow]*x[3*idx[jrow]+2];
445       jrow++;
446     }
447     y[3*i]   += sum1;
448     y[3*i+1] += sum2;
449     y[3*i+2] += sum3;
450   }
451 
452   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
453   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
454   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
455   PetscFunctionReturn(0);
456 }
457 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
458 {
459   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
460   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
461   const PetscScalar *x,*v;
462   PetscScalar       *y,alpha1,alpha2,alpha3;
463   PetscErrorCode    ierr;
464   PetscInt          n,i;
465   const PetscInt    m = b->AIJ->rmap->n,*idx;
466 
467   PetscFunctionBegin;
468   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
469   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
470   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
471   for (i=0; i<m; i++) {
472     idx    = a->j + a->i[i];
473     v      = a->a + a->i[i];
474     n      = a->i[i+1] - a->i[i];
475     alpha1 = x[3*i];
476     alpha2 = x[3*i+1];
477     alpha3 = x[3*i+2];
478     while (n-->0) {
479       y[3*(*idx)]   += alpha1*(*v);
480       y[3*(*idx)+1] += alpha2*(*v);
481       y[3*(*idx)+2] += alpha3*(*v);
482       idx++; v++;
483     }
484   }
485   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
486   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
487   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 /* ------------------------------------------------------------------------------*/
492 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
493 {
494   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
495   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
496   const PetscScalar *x,*v;
497   PetscScalar       *y,sum1, sum2, sum3, sum4;
498   PetscErrorCode    ierr;
499   PetscInt          nonzerorow=0,n,i,jrow,j;
500   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
501 
502   PetscFunctionBegin;
503   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
504   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
505   idx  = a->j;
506   v    = a->a;
507   ii   = a->i;
508 
509   for (i=0; i<m; i++) {
510     jrow        = ii[i];
511     n           = ii[i+1] - jrow;
512     sum1        = 0.0;
513     sum2        = 0.0;
514     sum3        = 0.0;
515     sum4        = 0.0;
516     nonzerorow += (n>0);
517     for (j=0; j<n; j++) {
518       sum1 += v[jrow]*x[4*idx[jrow]];
519       sum2 += v[jrow]*x[4*idx[jrow]+1];
520       sum3 += v[jrow]*x[4*idx[jrow]+2];
521       sum4 += v[jrow]*x[4*idx[jrow]+3];
522       jrow++;
523     }
524     y[4*i]   = sum1;
525     y[4*i+1] = sum2;
526     y[4*i+2] = sum3;
527     y[4*i+3] = sum4;
528   }
529 
530   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
531   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
532   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
537 {
538   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
539   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
540   const PetscScalar *x,*v;
541   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
542   PetscErrorCode    ierr;
543   PetscInt          n,i;
544   const PetscInt    m = b->AIJ->rmap->n,*idx;
545 
546   PetscFunctionBegin;
547   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
548   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
549   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
550   for (i=0; i<m; i++) {
551     idx    = a->j + a->i[i];
552     v      = a->a + a->i[i];
553     n      = a->i[i+1] - a->i[i];
554     alpha1 = x[4*i];
555     alpha2 = x[4*i+1];
556     alpha3 = x[4*i+2];
557     alpha4 = x[4*i+3];
558     while (n-->0) {
559       y[4*(*idx)]   += alpha1*(*v);
560       y[4*(*idx)+1] += alpha2*(*v);
561       y[4*(*idx)+2] += alpha3*(*v);
562       y[4*(*idx)+3] += alpha4*(*v);
563       idx++; v++;
564     }
565   }
566   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
567   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
568   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
569   PetscFunctionReturn(0);
570 }
571 
572 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
573 {
574   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
575   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
576   const PetscScalar *x,*v;
577   PetscScalar       *y,sum1, sum2, sum3, sum4;
578   PetscErrorCode    ierr;
579   PetscInt          n,i,jrow,j;
580   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
581 
582   PetscFunctionBegin;
583   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
584   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
585   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
586   idx  = a->j;
587   v    = a->a;
588   ii   = a->i;
589 
590   for (i=0; i<m; i++) {
591     jrow = ii[i];
592     n    = ii[i+1] - jrow;
593     sum1 = 0.0;
594     sum2 = 0.0;
595     sum3 = 0.0;
596     sum4 = 0.0;
597     for (j=0; j<n; j++) {
598       sum1 += v[jrow]*x[4*idx[jrow]];
599       sum2 += v[jrow]*x[4*idx[jrow]+1];
600       sum3 += v[jrow]*x[4*idx[jrow]+2];
601       sum4 += v[jrow]*x[4*idx[jrow]+3];
602       jrow++;
603     }
604     y[4*i]   += sum1;
605     y[4*i+1] += sum2;
606     y[4*i+2] += sum3;
607     y[4*i+3] += sum4;
608   }
609 
610   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
611   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
612   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
613   PetscFunctionReturn(0);
614 }
615 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
616 {
617   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
618   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
619   const PetscScalar *x,*v;
620   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
621   PetscErrorCode    ierr;
622   PetscInt          n,i;
623   const PetscInt    m = b->AIJ->rmap->n,*idx;
624 
625   PetscFunctionBegin;
626   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
627   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
628   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
629 
630   for (i=0; i<m; i++) {
631     idx    = a->j + a->i[i];
632     v      = a->a + a->i[i];
633     n      = a->i[i+1] - a->i[i];
634     alpha1 = x[4*i];
635     alpha2 = x[4*i+1];
636     alpha3 = x[4*i+2];
637     alpha4 = x[4*i+3];
638     while (n-->0) {
639       y[4*(*idx)]   += alpha1*(*v);
640       y[4*(*idx)+1] += alpha2*(*v);
641       y[4*(*idx)+2] += alpha3*(*v);
642       y[4*(*idx)+3] += alpha4*(*v);
643       idx++; v++;
644     }
645   }
646   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
647   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
648   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
649   PetscFunctionReturn(0);
650 }
651 /* ------------------------------------------------------------------------------*/
652 
653 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
654 {
655   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
656   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
657   const PetscScalar *x,*v;
658   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
659   PetscErrorCode    ierr;
660   PetscInt          nonzerorow=0,n,i,jrow,j;
661   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
662 
663   PetscFunctionBegin;
664   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
665   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
666   idx  = a->j;
667   v    = a->a;
668   ii   = a->i;
669 
670   for (i=0; i<m; i++) {
671     jrow  = ii[i];
672     n     = ii[i+1] - jrow;
673     sum1  = 0.0;
674     sum2  = 0.0;
675     sum3  = 0.0;
676     sum4  = 0.0;
677     sum5  = 0.0;
678 
679     nonzerorow += (n>0);
680     for (j=0; j<n; j++) {
681       sum1 += v[jrow]*x[5*idx[jrow]];
682       sum2 += v[jrow]*x[5*idx[jrow]+1];
683       sum3 += v[jrow]*x[5*idx[jrow]+2];
684       sum4 += v[jrow]*x[5*idx[jrow]+3];
685       sum5 += v[jrow]*x[5*idx[jrow]+4];
686       jrow++;
687     }
688     y[5*i]   = sum1;
689     y[5*i+1] = sum2;
690     y[5*i+2] = sum3;
691     y[5*i+3] = sum4;
692     y[5*i+4] = sum5;
693   }
694 
695   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
696   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
697   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
698   PetscFunctionReturn(0);
699 }
700 
701 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
702 {
703   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
704   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
705   const PetscScalar *x,*v;
706   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
707   PetscErrorCode    ierr;
708   PetscInt          n,i;
709   const PetscInt    m = b->AIJ->rmap->n,*idx;
710 
711   PetscFunctionBegin;
712   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
713   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
714   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
715 
716   for (i=0; i<m; i++) {
717     idx    = a->j + a->i[i];
718     v      = a->a + a->i[i];
719     n      = a->i[i+1] - a->i[i];
720     alpha1 = x[5*i];
721     alpha2 = x[5*i+1];
722     alpha3 = x[5*i+2];
723     alpha4 = x[5*i+3];
724     alpha5 = x[5*i+4];
725     while (n-->0) {
726       y[5*(*idx)]   += alpha1*(*v);
727       y[5*(*idx)+1] += alpha2*(*v);
728       y[5*(*idx)+2] += alpha3*(*v);
729       y[5*(*idx)+3] += alpha4*(*v);
730       y[5*(*idx)+4] += alpha5*(*v);
731       idx++; v++;
732     }
733   }
734   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
735   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
736   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
741 {
742   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
743   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
744   const PetscScalar *x,*v;
745   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
746   PetscErrorCode    ierr;
747   PetscInt          n,i,jrow,j;
748   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
749 
750   PetscFunctionBegin;
751   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
752   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
753   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
754   idx  = a->j;
755   v    = a->a;
756   ii   = a->i;
757 
758   for (i=0; i<m; i++) {
759     jrow = ii[i];
760     n    = ii[i+1] - jrow;
761     sum1 = 0.0;
762     sum2 = 0.0;
763     sum3 = 0.0;
764     sum4 = 0.0;
765     sum5 = 0.0;
766     for (j=0; j<n; j++) {
767       sum1 += v[jrow]*x[5*idx[jrow]];
768       sum2 += v[jrow]*x[5*idx[jrow]+1];
769       sum3 += v[jrow]*x[5*idx[jrow]+2];
770       sum4 += v[jrow]*x[5*idx[jrow]+3];
771       sum5 += v[jrow]*x[5*idx[jrow]+4];
772       jrow++;
773     }
774     y[5*i]   += sum1;
775     y[5*i+1] += sum2;
776     y[5*i+2] += sum3;
777     y[5*i+3] += sum4;
778     y[5*i+4] += sum5;
779   }
780 
781   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
782   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
783   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
784   PetscFunctionReturn(0);
785 }
786 
787 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
788 {
789   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
790   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
791   const PetscScalar *x,*v;
792   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
793   PetscErrorCode    ierr;
794   PetscInt          n,i;
795   const PetscInt    m = b->AIJ->rmap->n,*idx;
796 
797   PetscFunctionBegin;
798   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
799   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
800   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
801 
802   for (i=0; i<m; i++) {
803     idx    = a->j + a->i[i];
804     v      = a->a + a->i[i];
805     n      = a->i[i+1] - a->i[i];
806     alpha1 = x[5*i];
807     alpha2 = x[5*i+1];
808     alpha3 = x[5*i+2];
809     alpha4 = x[5*i+3];
810     alpha5 = x[5*i+4];
811     while (n-->0) {
812       y[5*(*idx)]   += alpha1*(*v);
813       y[5*(*idx)+1] += alpha2*(*v);
814       y[5*(*idx)+2] += alpha3*(*v);
815       y[5*(*idx)+3] += alpha4*(*v);
816       y[5*(*idx)+4] += alpha5*(*v);
817       idx++; v++;
818     }
819   }
820   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
821   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
822   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
823   PetscFunctionReturn(0);
824 }
825 
826 /* ------------------------------------------------------------------------------*/
827 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
828 {
829   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
830   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
831   const PetscScalar *x,*v;
832   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
833   PetscErrorCode    ierr;
834   PetscInt          nonzerorow=0,n,i,jrow,j;
835   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
836 
837   PetscFunctionBegin;
838   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
839   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
840   idx  = a->j;
841   v    = a->a;
842   ii   = a->i;
843 
844   for (i=0; i<m; i++) {
845     jrow  = ii[i];
846     n     = ii[i+1] - jrow;
847     sum1  = 0.0;
848     sum2  = 0.0;
849     sum3  = 0.0;
850     sum4  = 0.0;
851     sum5  = 0.0;
852     sum6  = 0.0;
853 
854     nonzerorow += (n>0);
855     for (j=0; j<n; j++) {
856       sum1 += v[jrow]*x[6*idx[jrow]];
857       sum2 += v[jrow]*x[6*idx[jrow]+1];
858       sum3 += v[jrow]*x[6*idx[jrow]+2];
859       sum4 += v[jrow]*x[6*idx[jrow]+3];
860       sum5 += v[jrow]*x[6*idx[jrow]+4];
861       sum6 += v[jrow]*x[6*idx[jrow]+5];
862       jrow++;
863     }
864     y[6*i]   = sum1;
865     y[6*i+1] = sum2;
866     y[6*i+2] = sum3;
867     y[6*i+3] = sum4;
868     y[6*i+4] = sum5;
869     y[6*i+5] = sum6;
870   }
871 
872   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
873   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
874   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
875   PetscFunctionReturn(0);
876 }
877 
878 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
879 {
880   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
881   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
882   const PetscScalar *x,*v;
883   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
884   PetscErrorCode    ierr;
885   PetscInt          n,i;
886   const PetscInt    m = b->AIJ->rmap->n,*idx;
887 
888   PetscFunctionBegin;
889   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
890   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
891   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
892 
893   for (i=0; i<m; i++) {
894     idx    = a->j + a->i[i];
895     v      = a->a + a->i[i];
896     n      = a->i[i+1] - a->i[i];
897     alpha1 = x[6*i];
898     alpha2 = x[6*i+1];
899     alpha3 = x[6*i+2];
900     alpha4 = x[6*i+3];
901     alpha5 = x[6*i+4];
902     alpha6 = x[6*i+5];
903     while (n-->0) {
904       y[6*(*idx)]   += alpha1*(*v);
905       y[6*(*idx)+1] += alpha2*(*v);
906       y[6*(*idx)+2] += alpha3*(*v);
907       y[6*(*idx)+3] += alpha4*(*v);
908       y[6*(*idx)+4] += alpha5*(*v);
909       y[6*(*idx)+5] += alpha6*(*v);
910       idx++; v++;
911     }
912   }
913   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
914   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
915   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
916   PetscFunctionReturn(0);
917 }
918 
919 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
920 {
921   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
922   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
923   const PetscScalar *x,*v;
924   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
925   PetscErrorCode    ierr;
926   PetscInt          n,i,jrow,j;
927   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
928 
929   PetscFunctionBegin;
930   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
931   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
932   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
933   idx  = a->j;
934   v    = a->a;
935   ii   = a->i;
936 
937   for (i=0; i<m; i++) {
938     jrow = ii[i];
939     n    = ii[i+1] - jrow;
940     sum1 = 0.0;
941     sum2 = 0.0;
942     sum3 = 0.0;
943     sum4 = 0.0;
944     sum5 = 0.0;
945     sum6 = 0.0;
946     for (j=0; j<n; j++) {
947       sum1 += v[jrow]*x[6*idx[jrow]];
948       sum2 += v[jrow]*x[6*idx[jrow]+1];
949       sum3 += v[jrow]*x[6*idx[jrow]+2];
950       sum4 += v[jrow]*x[6*idx[jrow]+3];
951       sum5 += v[jrow]*x[6*idx[jrow]+4];
952       sum6 += v[jrow]*x[6*idx[jrow]+5];
953       jrow++;
954     }
955     y[6*i]   += sum1;
956     y[6*i+1] += sum2;
957     y[6*i+2] += sum3;
958     y[6*i+3] += sum4;
959     y[6*i+4] += sum5;
960     y[6*i+5] += sum6;
961   }
962 
963   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
964   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
965   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
970 {
971   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
972   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
973   const PetscScalar *x,*v;
974   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
975   PetscErrorCode    ierr;
976   PetscInt          n,i;
977   const PetscInt    m = b->AIJ->rmap->n,*idx;
978 
979   PetscFunctionBegin;
980   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
981   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
982   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
983 
984   for (i=0; i<m; i++) {
985     idx    = a->j + a->i[i];
986     v      = a->a + a->i[i];
987     n      = a->i[i+1] - a->i[i];
988     alpha1 = x[6*i];
989     alpha2 = x[6*i+1];
990     alpha3 = x[6*i+2];
991     alpha4 = x[6*i+3];
992     alpha5 = x[6*i+4];
993     alpha6 = x[6*i+5];
994     while (n-->0) {
995       y[6*(*idx)]   += alpha1*(*v);
996       y[6*(*idx)+1] += alpha2*(*v);
997       y[6*(*idx)+2] += alpha3*(*v);
998       y[6*(*idx)+3] += alpha4*(*v);
999       y[6*(*idx)+4] += alpha5*(*v);
1000       y[6*(*idx)+5] += alpha6*(*v);
1001       idx++; v++;
1002     }
1003   }
1004   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1005   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1006   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 /* ------------------------------------------------------------------------------*/
1011 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1012 {
1013   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1014   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1015   const PetscScalar *x,*v;
1016   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1017   PetscErrorCode    ierr;
1018   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1019   PetscInt          nonzerorow=0,n,i,jrow,j;
1020 
1021   PetscFunctionBegin;
1022   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1023   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1024   idx  = a->j;
1025   v    = a->a;
1026   ii   = a->i;
1027 
1028   for (i=0; i<m; i++) {
1029     jrow  = ii[i];
1030     n     = ii[i+1] - jrow;
1031     sum1  = 0.0;
1032     sum2  = 0.0;
1033     sum3  = 0.0;
1034     sum4  = 0.0;
1035     sum5  = 0.0;
1036     sum6  = 0.0;
1037     sum7  = 0.0;
1038 
1039     nonzerorow += (n>0);
1040     for (j=0; j<n; j++) {
1041       sum1 += v[jrow]*x[7*idx[jrow]];
1042       sum2 += v[jrow]*x[7*idx[jrow]+1];
1043       sum3 += v[jrow]*x[7*idx[jrow]+2];
1044       sum4 += v[jrow]*x[7*idx[jrow]+3];
1045       sum5 += v[jrow]*x[7*idx[jrow]+4];
1046       sum6 += v[jrow]*x[7*idx[jrow]+5];
1047       sum7 += v[jrow]*x[7*idx[jrow]+6];
1048       jrow++;
1049     }
1050     y[7*i]   = sum1;
1051     y[7*i+1] = sum2;
1052     y[7*i+2] = sum3;
1053     y[7*i+3] = sum4;
1054     y[7*i+4] = sum5;
1055     y[7*i+5] = sum6;
1056     y[7*i+6] = sum7;
1057   }
1058 
1059   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
1060   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1061   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1066 {
1067   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1068   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1069   const PetscScalar *x,*v;
1070   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1071   PetscErrorCode    ierr;
1072   const PetscInt    m = b->AIJ->rmap->n,*idx;
1073   PetscInt          n,i;
1074 
1075   PetscFunctionBegin;
1076   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1077   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1078   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1079 
1080   for (i=0; i<m; i++) {
1081     idx    = a->j + a->i[i];
1082     v      = a->a + a->i[i];
1083     n      = a->i[i+1] - a->i[i];
1084     alpha1 = x[7*i];
1085     alpha2 = x[7*i+1];
1086     alpha3 = x[7*i+2];
1087     alpha4 = x[7*i+3];
1088     alpha5 = x[7*i+4];
1089     alpha6 = x[7*i+5];
1090     alpha7 = x[7*i+6];
1091     while (n-->0) {
1092       y[7*(*idx)]   += alpha1*(*v);
1093       y[7*(*idx)+1] += alpha2*(*v);
1094       y[7*(*idx)+2] += alpha3*(*v);
1095       y[7*(*idx)+3] += alpha4*(*v);
1096       y[7*(*idx)+4] += alpha5*(*v);
1097       y[7*(*idx)+5] += alpha6*(*v);
1098       y[7*(*idx)+6] += alpha7*(*v);
1099       idx++; v++;
1100     }
1101   }
1102   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1103   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1104   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1109 {
1110   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1111   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1112   const PetscScalar *x,*v;
1113   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1114   PetscErrorCode    ierr;
1115   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1116   PetscInt          n,i,jrow,j;
1117 
1118   PetscFunctionBegin;
1119   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1120   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1121   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1122   idx  = a->j;
1123   v    = a->a;
1124   ii   = a->i;
1125 
1126   for (i=0; i<m; i++) {
1127     jrow = ii[i];
1128     n    = ii[i+1] - jrow;
1129     sum1 = 0.0;
1130     sum2 = 0.0;
1131     sum3 = 0.0;
1132     sum4 = 0.0;
1133     sum5 = 0.0;
1134     sum6 = 0.0;
1135     sum7 = 0.0;
1136     for (j=0; j<n; j++) {
1137       sum1 += v[jrow]*x[7*idx[jrow]];
1138       sum2 += v[jrow]*x[7*idx[jrow]+1];
1139       sum3 += v[jrow]*x[7*idx[jrow]+2];
1140       sum4 += v[jrow]*x[7*idx[jrow]+3];
1141       sum5 += v[jrow]*x[7*idx[jrow]+4];
1142       sum6 += v[jrow]*x[7*idx[jrow]+5];
1143       sum7 += v[jrow]*x[7*idx[jrow]+6];
1144       jrow++;
1145     }
1146     y[7*i]   += sum1;
1147     y[7*i+1] += sum2;
1148     y[7*i+2] += sum3;
1149     y[7*i+3] += sum4;
1150     y[7*i+4] += sum5;
1151     y[7*i+5] += sum6;
1152     y[7*i+6] += sum7;
1153   }
1154 
1155   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1156   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1157   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1162 {
1163   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1164   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1165   const PetscScalar *x,*v;
1166   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1167   PetscErrorCode    ierr;
1168   const PetscInt    m = b->AIJ->rmap->n,*idx;
1169   PetscInt          n,i;
1170 
1171   PetscFunctionBegin;
1172   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1173   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1174   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1175   for (i=0; i<m; i++) {
1176     idx    = a->j + a->i[i];
1177     v      = a->a + a->i[i];
1178     n      = a->i[i+1] - a->i[i];
1179     alpha1 = x[7*i];
1180     alpha2 = x[7*i+1];
1181     alpha3 = x[7*i+2];
1182     alpha4 = x[7*i+3];
1183     alpha5 = x[7*i+4];
1184     alpha6 = x[7*i+5];
1185     alpha7 = x[7*i+6];
1186     while (n-->0) {
1187       y[7*(*idx)]   += alpha1*(*v);
1188       y[7*(*idx)+1] += alpha2*(*v);
1189       y[7*(*idx)+2] += alpha3*(*v);
1190       y[7*(*idx)+3] += alpha4*(*v);
1191       y[7*(*idx)+4] += alpha5*(*v);
1192       y[7*(*idx)+5] += alpha6*(*v);
1193       y[7*(*idx)+6] += alpha7*(*v);
1194       idx++; v++;
1195     }
1196   }
1197   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1198   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1199   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1204 {
1205   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1206   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1207   const PetscScalar *x,*v;
1208   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1209   PetscErrorCode    ierr;
1210   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1211   PetscInt          nonzerorow=0,n,i,jrow,j;
1212 
1213   PetscFunctionBegin;
1214   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1215   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1216   idx  = a->j;
1217   v    = a->a;
1218   ii   = a->i;
1219 
1220   for (i=0; i<m; i++) {
1221     jrow  = ii[i];
1222     n     = ii[i+1] - jrow;
1223     sum1  = 0.0;
1224     sum2  = 0.0;
1225     sum3  = 0.0;
1226     sum4  = 0.0;
1227     sum5  = 0.0;
1228     sum6  = 0.0;
1229     sum7  = 0.0;
1230     sum8  = 0.0;
1231 
1232     nonzerorow += (n>0);
1233     for (j=0; j<n; j++) {
1234       sum1 += v[jrow]*x[8*idx[jrow]];
1235       sum2 += v[jrow]*x[8*idx[jrow]+1];
1236       sum3 += v[jrow]*x[8*idx[jrow]+2];
1237       sum4 += v[jrow]*x[8*idx[jrow]+3];
1238       sum5 += v[jrow]*x[8*idx[jrow]+4];
1239       sum6 += v[jrow]*x[8*idx[jrow]+5];
1240       sum7 += v[jrow]*x[8*idx[jrow]+6];
1241       sum8 += v[jrow]*x[8*idx[jrow]+7];
1242       jrow++;
1243     }
1244     y[8*i]   = sum1;
1245     y[8*i+1] = sum2;
1246     y[8*i+2] = sum3;
1247     y[8*i+3] = sum4;
1248     y[8*i+4] = sum5;
1249     y[8*i+5] = sum6;
1250     y[8*i+6] = sum7;
1251     y[8*i+7] = sum8;
1252   }
1253 
1254   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
1255   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1256   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1261 {
1262   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1263   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1264   const PetscScalar *x,*v;
1265   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1266   PetscErrorCode    ierr;
1267   const PetscInt    m = b->AIJ->rmap->n,*idx;
1268   PetscInt          n,i;
1269 
1270   PetscFunctionBegin;
1271   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1272   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1273   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1274 
1275   for (i=0; i<m; i++) {
1276     idx    = a->j + a->i[i];
1277     v      = a->a + a->i[i];
1278     n      = a->i[i+1] - a->i[i];
1279     alpha1 = x[8*i];
1280     alpha2 = x[8*i+1];
1281     alpha3 = x[8*i+2];
1282     alpha4 = x[8*i+3];
1283     alpha5 = x[8*i+4];
1284     alpha6 = x[8*i+5];
1285     alpha7 = x[8*i+6];
1286     alpha8 = x[8*i+7];
1287     while (n-->0) {
1288       y[8*(*idx)]   += alpha1*(*v);
1289       y[8*(*idx)+1] += alpha2*(*v);
1290       y[8*(*idx)+2] += alpha3*(*v);
1291       y[8*(*idx)+3] += alpha4*(*v);
1292       y[8*(*idx)+4] += alpha5*(*v);
1293       y[8*(*idx)+5] += alpha6*(*v);
1294       y[8*(*idx)+6] += alpha7*(*v);
1295       y[8*(*idx)+7] += alpha8*(*v);
1296       idx++; v++;
1297     }
1298   }
1299   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1300   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1301   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1306 {
1307   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1308   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1309   const PetscScalar *x,*v;
1310   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1311   PetscErrorCode    ierr;
1312   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1313   PetscInt          n,i,jrow,j;
1314 
1315   PetscFunctionBegin;
1316   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1317   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1318   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1319   idx  = a->j;
1320   v    = a->a;
1321   ii   = a->i;
1322 
1323   for (i=0; i<m; i++) {
1324     jrow = ii[i];
1325     n    = ii[i+1] - jrow;
1326     sum1 = 0.0;
1327     sum2 = 0.0;
1328     sum3 = 0.0;
1329     sum4 = 0.0;
1330     sum5 = 0.0;
1331     sum6 = 0.0;
1332     sum7 = 0.0;
1333     sum8 = 0.0;
1334     for (j=0; j<n; j++) {
1335       sum1 += v[jrow]*x[8*idx[jrow]];
1336       sum2 += v[jrow]*x[8*idx[jrow]+1];
1337       sum3 += v[jrow]*x[8*idx[jrow]+2];
1338       sum4 += v[jrow]*x[8*idx[jrow]+3];
1339       sum5 += v[jrow]*x[8*idx[jrow]+4];
1340       sum6 += v[jrow]*x[8*idx[jrow]+5];
1341       sum7 += v[jrow]*x[8*idx[jrow]+6];
1342       sum8 += v[jrow]*x[8*idx[jrow]+7];
1343       jrow++;
1344     }
1345     y[8*i]   += sum1;
1346     y[8*i+1] += sum2;
1347     y[8*i+2] += sum3;
1348     y[8*i+3] += sum4;
1349     y[8*i+4] += sum5;
1350     y[8*i+5] += sum6;
1351     y[8*i+6] += sum7;
1352     y[8*i+7] += sum8;
1353   }
1354 
1355   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1356   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1357   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1362 {
1363   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1364   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1365   const PetscScalar *x,*v;
1366   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1367   PetscErrorCode    ierr;
1368   const PetscInt    m = b->AIJ->rmap->n,*idx;
1369   PetscInt          n,i;
1370 
1371   PetscFunctionBegin;
1372   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1373   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1374   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1375   for (i=0; i<m; i++) {
1376     idx    = a->j + a->i[i];
1377     v      = a->a + a->i[i];
1378     n      = a->i[i+1] - a->i[i];
1379     alpha1 = x[8*i];
1380     alpha2 = x[8*i+1];
1381     alpha3 = x[8*i+2];
1382     alpha4 = x[8*i+3];
1383     alpha5 = x[8*i+4];
1384     alpha6 = x[8*i+5];
1385     alpha7 = x[8*i+6];
1386     alpha8 = x[8*i+7];
1387     while (n-->0) {
1388       y[8*(*idx)]   += alpha1*(*v);
1389       y[8*(*idx)+1] += alpha2*(*v);
1390       y[8*(*idx)+2] += alpha3*(*v);
1391       y[8*(*idx)+3] += alpha4*(*v);
1392       y[8*(*idx)+4] += alpha5*(*v);
1393       y[8*(*idx)+5] += alpha6*(*v);
1394       y[8*(*idx)+6] += alpha7*(*v);
1395       y[8*(*idx)+7] += alpha8*(*v);
1396       idx++; v++;
1397     }
1398   }
1399   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1400   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1401   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 /* ------------------------------------------------------------------------------*/
1406 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1407 {
1408   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1409   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1410   const PetscScalar *x,*v;
1411   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1412   PetscErrorCode    ierr;
1413   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1414   PetscInt          nonzerorow=0,n,i,jrow,j;
1415 
1416   PetscFunctionBegin;
1417   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1418   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1419   idx  = a->j;
1420   v    = a->a;
1421   ii   = a->i;
1422 
1423   for (i=0; i<m; i++) {
1424     jrow  = ii[i];
1425     n     = ii[i+1] - jrow;
1426     sum1  = 0.0;
1427     sum2  = 0.0;
1428     sum3  = 0.0;
1429     sum4  = 0.0;
1430     sum5  = 0.0;
1431     sum6  = 0.0;
1432     sum7  = 0.0;
1433     sum8  = 0.0;
1434     sum9  = 0.0;
1435 
1436     nonzerorow += (n>0);
1437     for (j=0; j<n; j++) {
1438       sum1 += v[jrow]*x[9*idx[jrow]];
1439       sum2 += v[jrow]*x[9*idx[jrow]+1];
1440       sum3 += v[jrow]*x[9*idx[jrow]+2];
1441       sum4 += v[jrow]*x[9*idx[jrow]+3];
1442       sum5 += v[jrow]*x[9*idx[jrow]+4];
1443       sum6 += v[jrow]*x[9*idx[jrow]+5];
1444       sum7 += v[jrow]*x[9*idx[jrow]+6];
1445       sum8 += v[jrow]*x[9*idx[jrow]+7];
1446       sum9 += v[jrow]*x[9*idx[jrow]+8];
1447       jrow++;
1448     }
1449     y[9*i]   = sum1;
1450     y[9*i+1] = sum2;
1451     y[9*i+2] = sum3;
1452     y[9*i+3] = sum4;
1453     y[9*i+4] = sum5;
1454     y[9*i+5] = sum6;
1455     y[9*i+6] = sum7;
1456     y[9*i+7] = sum8;
1457     y[9*i+8] = sum9;
1458   }
1459 
1460   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
1461   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1462   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 /* ------------------------------------------------------------------------------*/
1467 
1468 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1469 {
1470   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1471   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1472   const PetscScalar *x,*v;
1473   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1474   PetscErrorCode    ierr;
1475   const PetscInt    m = b->AIJ->rmap->n,*idx;
1476   PetscInt          n,i;
1477 
1478   PetscFunctionBegin;
1479   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1480   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1481   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1482 
1483   for (i=0; i<m; i++) {
1484     idx    = a->j + a->i[i];
1485     v      = a->a + a->i[i];
1486     n      = a->i[i+1] - a->i[i];
1487     alpha1 = x[9*i];
1488     alpha2 = x[9*i+1];
1489     alpha3 = x[9*i+2];
1490     alpha4 = x[9*i+3];
1491     alpha5 = x[9*i+4];
1492     alpha6 = x[9*i+5];
1493     alpha7 = x[9*i+6];
1494     alpha8 = x[9*i+7];
1495     alpha9 = x[9*i+8];
1496     while (n-->0) {
1497       y[9*(*idx)]   += alpha1*(*v);
1498       y[9*(*idx)+1] += alpha2*(*v);
1499       y[9*(*idx)+2] += alpha3*(*v);
1500       y[9*(*idx)+3] += alpha4*(*v);
1501       y[9*(*idx)+4] += alpha5*(*v);
1502       y[9*(*idx)+5] += alpha6*(*v);
1503       y[9*(*idx)+6] += alpha7*(*v);
1504       y[9*(*idx)+7] += alpha8*(*v);
1505       y[9*(*idx)+8] += alpha9*(*v);
1506       idx++; v++;
1507     }
1508   }
1509   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1510   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1511   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1516 {
1517   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1518   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1519   const PetscScalar *x,*v;
1520   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1521   PetscErrorCode    ierr;
1522   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1523   PetscInt          n,i,jrow,j;
1524 
1525   PetscFunctionBegin;
1526   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1527   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1528   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1529   idx  = a->j;
1530   v    = a->a;
1531   ii   = a->i;
1532 
1533   for (i=0; i<m; i++) {
1534     jrow = ii[i];
1535     n    = ii[i+1] - jrow;
1536     sum1 = 0.0;
1537     sum2 = 0.0;
1538     sum3 = 0.0;
1539     sum4 = 0.0;
1540     sum5 = 0.0;
1541     sum6 = 0.0;
1542     sum7 = 0.0;
1543     sum8 = 0.0;
1544     sum9 = 0.0;
1545     for (j=0; j<n; j++) {
1546       sum1 += v[jrow]*x[9*idx[jrow]];
1547       sum2 += v[jrow]*x[9*idx[jrow]+1];
1548       sum3 += v[jrow]*x[9*idx[jrow]+2];
1549       sum4 += v[jrow]*x[9*idx[jrow]+3];
1550       sum5 += v[jrow]*x[9*idx[jrow]+4];
1551       sum6 += v[jrow]*x[9*idx[jrow]+5];
1552       sum7 += v[jrow]*x[9*idx[jrow]+6];
1553       sum8 += v[jrow]*x[9*idx[jrow]+7];
1554       sum9 += v[jrow]*x[9*idx[jrow]+8];
1555       jrow++;
1556     }
1557     y[9*i]   += sum1;
1558     y[9*i+1] += sum2;
1559     y[9*i+2] += sum3;
1560     y[9*i+3] += sum4;
1561     y[9*i+4] += sum5;
1562     y[9*i+5] += sum6;
1563     y[9*i+6] += sum7;
1564     y[9*i+7] += sum8;
1565     y[9*i+8] += sum9;
1566   }
1567 
1568   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1569   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1570   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1575 {
1576   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1577   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1578   const PetscScalar *x,*v;
1579   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1580   PetscErrorCode    ierr;
1581   const PetscInt    m = b->AIJ->rmap->n,*idx;
1582   PetscInt          n,i;
1583 
1584   PetscFunctionBegin;
1585   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1586   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1587   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1588   for (i=0; i<m; i++) {
1589     idx    = a->j + a->i[i];
1590     v      = a->a + a->i[i];
1591     n      = a->i[i+1] - a->i[i];
1592     alpha1 = x[9*i];
1593     alpha2 = x[9*i+1];
1594     alpha3 = x[9*i+2];
1595     alpha4 = x[9*i+3];
1596     alpha5 = x[9*i+4];
1597     alpha6 = x[9*i+5];
1598     alpha7 = x[9*i+6];
1599     alpha8 = x[9*i+7];
1600     alpha9 = x[9*i+8];
1601     while (n-->0) {
1602       y[9*(*idx)]   += alpha1*(*v);
1603       y[9*(*idx)+1] += alpha2*(*v);
1604       y[9*(*idx)+2] += alpha3*(*v);
1605       y[9*(*idx)+3] += alpha4*(*v);
1606       y[9*(*idx)+4] += alpha5*(*v);
1607       y[9*(*idx)+5] += alpha6*(*v);
1608       y[9*(*idx)+6] += alpha7*(*v);
1609       y[9*(*idx)+7] += alpha8*(*v);
1610       y[9*(*idx)+8] += alpha9*(*v);
1611       idx++; v++;
1612     }
1613   }
1614   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1615   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1616   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1617   PetscFunctionReturn(0);
1618 }
1619 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1620 {
1621   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1622   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1623   const PetscScalar *x,*v;
1624   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1625   PetscErrorCode    ierr;
1626   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1627   PetscInt          nonzerorow=0,n,i,jrow,j;
1628 
1629   PetscFunctionBegin;
1630   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1631   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1632   idx  = a->j;
1633   v    = a->a;
1634   ii   = a->i;
1635 
1636   for (i=0; i<m; i++) {
1637     jrow  = ii[i];
1638     n     = ii[i+1] - jrow;
1639     sum1  = 0.0;
1640     sum2  = 0.0;
1641     sum3  = 0.0;
1642     sum4  = 0.0;
1643     sum5  = 0.0;
1644     sum6  = 0.0;
1645     sum7  = 0.0;
1646     sum8  = 0.0;
1647     sum9  = 0.0;
1648     sum10 = 0.0;
1649 
1650     nonzerorow += (n>0);
1651     for (j=0; j<n; j++) {
1652       sum1  += v[jrow]*x[10*idx[jrow]];
1653       sum2  += v[jrow]*x[10*idx[jrow]+1];
1654       sum3  += v[jrow]*x[10*idx[jrow]+2];
1655       sum4  += v[jrow]*x[10*idx[jrow]+3];
1656       sum5  += v[jrow]*x[10*idx[jrow]+4];
1657       sum6  += v[jrow]*x[10*idx[jrow]+5];
1658       sum7  += v[jrow]*x[10*idx[jrow]+6];
1659       sum8  += v[jrow]*x[10*idx[jrow]+7];
1660       sum9  += v[jrow]*x[10*idx[jrow]+8];
1661       sum10 += v[jrow]*x[10*idx[jrow]+9];
1662       jrow++;
1663     }
1664     y[10*i]   = sum1;
1665     y[10*i+1] = sum2;
1666     y[10*i+2] = sum3;
1667     y[10*i+3] = sum4;
1668     y[10*i+4] = sum5;
1669     y[10*i+5] = sum6;
1670     y[10*i+6] = sum7;
1671     y[10*i+7] = sum8;
1672     y[10*i+8] = sum9;
1673     y[10*i+9] = sum10;
1674   }
1675 
1676   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
1677   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1678   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1683 {
1684   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1685   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1686   const PetscScalar *x,*v;
1687   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1688   PetscErrorCode    ierr;
1689   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1690   PetscInt          n,i,jrow,j;
1691 
1692   PetscFunctionBegin;
1693   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1694   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1695   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1696   idx  = a->j;
1697   v    = a->a;
1698   ii   = a->i;
1699 
1700   for (i=0; i<m; i++) {
1701     jrow  = ii[i];
1702     n     = ii[i+1] - jrow;
1703     sum1  = 0.0;
1704     sum2  = 0.0;
1705     sum3  = 0.0;
1706     sum4  = 0.0;
1707     sum5  = 0.0;
1708     sum6  = 0.0;
1709     sum7  = 0.0;
1710     sum8  = 0.0;
1711     sum9  = 0.0;
1712     sum10 = 0.0;
1713     for (j=0; j<n; j++) {
1714       sum1  += v[jrow]*x[10*idx[jrow]];
1715       sum2  += v[jrow]*x[10*idx[jrow]+1];
1716       sum3  += v[jrow]*x[10*idx[jrow]+2];
1717       sum4  += v[jrow]*x[10*idx[jrow]+3];
1718       sum5  += v[jrow]*x[10*idx[jrow]+4];
1719       sum6  += v[jrow]*x[10*idx[jrow]+5];
1720       sum7  += v[jrow]*x[10*idx[jrow]+6];
1721       sum8  += v[jrow]*x[10*idx[jrow]+7];
1722       sum9  += v[jrow]*x[10*idx[jrow]+8];
1723       sum10 += v[jrow]*x[10*idx[jrow]+9];
1724       jrow++;
1725     }
1726     y[10*i]   += sum1;
1727     y[10*i+1] += sum2;
1728     y[10*i+2] += sum3;
1729     y[10*i+3] += sum4;
1730     y[10*i+4] += sum5;
1731     y[10*i+5] += sum6;
1732     y[10*i+6] += sum7;
1733     y[10*i+7] += sum8;
1734     y[10*i+8] += sum9;
1735     y[10*i+9] += sum10;
1736   }
1737 
1738   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1739   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1740   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1745 {
1746   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1747   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1748   const PetscScalar *x,*v;
1749   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1750   PetscErrorCode    ierr;
1751   const PetscInt    m = b->AIJ->rmap->n,*idx;
1752   PetscInt          n,i;
1753 
1754   PetscFunctionBegin;
1755   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1756   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1757   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1758 
1759   for (i=0; i<m; i++) {
1760     idx     = a->j + a->i[i];
1761     v       = a->a + a->i[i];
1762     n       = a->i[i+1] - a->i[i];
1763     alpha1  = x[10*i];
1764     alpha2  = x[10*i+1];
1765     alpha3  = x[10*i+2];
1766     alpha4  = x[10*i+3];
1767     alpha5  = x[10*i+4];
1768     alpha6  = x[10*i+5];
1769     alpha7  = x[10*i+6];
1770     alpha8  = x[10*i+7];
1771     alpha9  = x[10*i+8];
1772     alpha10 = x[10*i+9];
1773     while (n-->0) {
1774       y[10*(*idx)]   += alpha1*(*v);
1775       y[10*(*idx)+1] += alpha2*(*v);
1776       y[10*(*idx)+2] += alpha3*(*v);
1777       y[10*(*idx)+3] += alpha4*(*v);
1778       y[10*(*idx)+4] += alpha5*(*v);
1779       y[10*(*idx)+5] += alpha6*(*v);
1780       y[10*(*idx)+6] += alpha7*(*v);
1781       y[10*(*idx)+7] += alpha8*(*v);
1782       y[10*(*idx)+8] += alpha9*(*v);
1783       y[10*(*idx)+9] += alpha10*(*v);
1784       idx++; v++;
1785     }
1786   }
1787   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1788   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1789   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1794 {
1795   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1796   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1797   const PetscScalar *x,*v;
1798   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1799   PetscErrorCode    ierr;
1800   const PetscInt    m = b->AIJ->rmap->n,*idx;
1801   PetscInt          n,i;
1802 
1803   PetscFunctionBegin;
1804   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1805   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1806   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1807   for (i=0; i<m; i++) {
1808     idx     = a->j + a->i[i];
1809     v       = a->a + a->i[i];
1810     n       = a->i[i+1] - a->i[i];
1811     alpha1  = x[10*i];
1812     alpha2  = x[10*i+1];
1813     alpha3  = x[10*i+2];
1814     alpha4  = x[10*i+3];
1815     alpha5  = x[10*i+4];
1816     alpha6  = x[10*i+5];
1817     alpha7  = x[10*i+6];
1818     alpha8  = x[10*i+7];
1819     alpha9  = x[10*i+8];
1820     alpha10 = x[10*i+9];
1821     while (n-->0) {
1822       y[10*(*idx)]   += alpha1*(*v);
1823       y[10*(*idx)+1] += alpha2*(*v);
1824       y[10*(*idx)+2] += alpha3*(*v);
1825       y[10*(*idx)+3] += alpha4*(*v);
1826       y[10*(*idx)+4] += alpha5*(*v);
1827       y[10*(*idx)+5] += alpha6*(*v);
1828       y[10*(*idx)+6] += alpha7*(*v);
1829       y[10*(*idx)+7] += alpha8*(*v);
1830       y[10*(*idx)+8] += alpha9*(*v);
1831       y[10*(*idx)+9] += alpha10*(*v);
1832       idx++; v++;
1833     }
1834   }
1835   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1836   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1837   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 /*--------------------------------------------------------------------------------------------*/
1842 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1843 {
1844   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1845   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1846   const PetscScalar *x,*v;
1847   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1848   PetscErrorCode    ierr;
1849   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1850   PetscInt          nonzerorow=0,n,i,jrow,j;
1851 
1852   PetscFunctionBegin;
1853   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1854   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1855   idx  = a->j;
1856   v    = a->a;
1857   ii   = a->i;
1858 
1859   for (i=0; i<m; i++) {
1860     jrow  = ii[i];
1861     n     = ii[i+1] - jrow;
1862     sum1  = 0.0;
1863     sum2  = 0.0;
1864     sum3  = 0.0;
1865     sum4  = 0.0;
1866     sum5  = 0.0;
1867     sum6  = 0.0;
1868     sum7  = 0.0;
1869     sum8  = 0.0;
1870     sum9  = 0.0;
1871     sum10 = 0.0;
1872     sum11 = 0.0;
1873 
1874     nonzerorow += (n>0);
1875     for (j=0; j<n; j++) {
1876       sum1  += v[jrow]*x[11*idx[jrow]];
1877       sum2  += v[jrow]*x[11*idx[jrow]+1];
1878       sum3  += v[jrow]*x[11*idx[jrow]+2];
1879       sum4  += v[jrow]*x[11*idx[jrow]+3];
1880       sum5  += v[jrow]*x[11*idx[jrow]+4];
1881       sum6  += v[jrow]*x[11*idx[jrow]+5];
1882       sum7  += v[jrow]*x[11*idx[jrow]+6];
1883       sum8  += v[jrow]*x[11*idx[jrow]+7];
1884       sum9  += v[jrow]*x[11*idx[jrow]+8];
1885       sum10 += v[jrow]*x[11*idx[jrow]+9];
1886       sum11 += v[jrow]*x[11*idx[jrow]+10];
1887       jrow++;
1888     }
1889     y[11*i]    = sum1;
1890     y[11*i+1]  = sum2;
1891     y[11*i+2]  = sum3;
1892     y[11*i+3]  = sum4;
1893     y[11*i+4]  = sum5;
1894     y[11*i+5]  = sum6;
1895     y[11*i+6]  = sum7;
1896     y[11*i+7]  = sum8;
1897     y[11*i+8]  = sum9;
1898     y[11*i+9]  = sum10;
1899     y[11*i+10] = sum11;
1900   }
1901 
1902   ierr = PetscLogFlops(22.0*a->nz - 11*nonzerorow);CHKERRQ(ierr);
1903   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1904   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1909 {
1910   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1911   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1912   const PetscScalar *x,*v;
1913   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1914   PetscErrorCode    ierr;
1915   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1916   PetscInt          n,i,jrow,j;
1917 
1918   PetscFunctionBegin;
1919   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1920   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1921   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1922   idx  = a->j;
1923   v    = a->a;
1924   ii   = a->i;
1925 
1926   for (i=0; i<m; i++) {
1927     jrow  = ii[i];
1928     n     = ii[i+1] - jrow;
1929     sum1  = 0.0;
1930     sum2  = 0.0;
1931     sum3  = 0.0;
1932     sum4  = 0.0;
1933     sum5  = 0.0;
1934     sum6  = 0.0;
1935     sum7  = 0.0;
1936     sum8  = 0.0;
1937     sum9  = 0.0;
1938     sum10 = 0.0;
1939     sum11 = 0.0;
1940     for (j=0; j<n; j++) {
1941       sum1  += v[jrow]*x[11*idx[jrow]];
1942       sum2  += v[jrow]*x[11*idx[jrow]+1];
1943       sum3  += v[jrow]*x[11*idx[jrow]+2];
1944       sum4  += v[jrow]*x[11*idx[jrow]+3];
1945       sum5  += v[jrow]*x[11*idx[jrow]+4];
1946       sum6  += v[jrow]*x[11*idx[jrow]+5];
1947       sum7  += v[jrow]*x[11*idx[jrow]+6];
1948       sum8  += v[jrow]*x[11*idx[jrow]+7];
1949       sum9  += v[jrow]*x[11*idx[jrow]+8];
1950       sum10 += v[jrow]*x[11*idx[jrow]+9];
1951       sum11 += v[jrow]*x[11*idx[jrow]+10];
1952       jrow++;
1953     }
1954     y[11*i]    += sum1;
1955     y[11*i+1]  += sum2;
1956     y[11*i+2]  += sum3;
1957     y[11*i+3]  += sum4;
1958     y[11*i+4]  += sum5;
1959     y[11*i+5]  += sum6;
1960     y[11*i+6]  += sum7;
1961     y[11*i+7]  += sum8;
1962     y[11*i+8]  += sum9;
1963     y[11*i+9]  += sum10;
1964     y[11*i+10] += sum11;
1965   }
1966 
1967   ierr = PetscLogFlops(22.0*a->nz);CHKERRQ(ierr);
1968   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1969   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1974 {
1975   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1976   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1977   const PetscScalar *x,*v;
1978   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1979   PetscErrorCode    ierr;
1980   const PetscInt    m = b->AIJ->rmap->n,*idx;
1981   PetscInt          n,i;
1982 
1983   PetscFunctionBegin;
1984   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1985   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1986   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1987 
1988   for (i=0; i<m; i++) {
1989     idx     = a->j + a->i[i];
1990     v       = a->a + a->i[i];
1991     n       = a->i[i+1] - a->i[i];
1992     alpha1  = x[11*i];
1993     alpha2  = x[11*i+1];
1994     alpha3  = x[11*i+2];
1995     alpha4  = x[11*i+3];
1996     alpha5  = x[11*i+4];
1997     alpha6  = x[11*i+5];
1998     alpha7  = x[11*i+6];
1999     alpha8  = x[11*i+7];
2000     alpha9  = x[11*i+8];
2001     alpha10 = x[11*i+9];
2002     alpha11 = x[11*i+10];
2003     while (n-->0) {
2004       y[11*(*idx)]    += alpha1*(*v);
2005       y[11*(*idx)+1]  += alpha2*(*v);
2006       y[11*(*idx)+2]  += alpha3*(*v);
2007       y[11*(*idx)+3]  += alpha4*(*v);
2008       y[11*(*idx)+4]  += alpha5*(*v);
2009       y[11*(*idx)+5]  += alpha6*(*v);
2010       y[11*(*idx)+6]  += alpha7*(*v);
2011       y[11*(*idx)+7]  += alpha8*(*v);
2012       y[11*(*idx)+8]  += alpha9*(*v);
2013       y[11*(*idx)+9]  += alpha10*(*v);
2014       y[11*(*idx)+10] += alpha11*(*v);
2015       idx++; v++;
2016     }
2017   }
2018   ierr = PetscLogFlops(22.0*a->nz);CHKERRQ(ierr);
2019   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2020   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2025 {
2026   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2027   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2028   const PetscScalar *x,*v;
2029   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2030   PetscErrorCode    ierr;
2031   const PetscInt    m = b->AIJ->rmap->n,*idx;
2032   PetscInt          n,i;
2033 
2034   PetscFunctionBegin;
2035   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2036   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2037   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2038   for (i=0; i<m; i++) {
2039     idx     = a->j + a->i[i];
2040     v       = a->a + a->i[i];
2041     n       = a->i[i+1] - a->i[i];
2042     alpha1  = x[11*i];
2043     alpha2  = x[11*i+1];
2044     alpha3  = x[11*i+2];
2045     alpha4  = x[11*i+3];
2046     alpha5  = x[11*i+4];
2047     alpha6  = x[11*i+5];
2048     alpha7  = x[11*i+6];
2049     alpha8  = x[11*i+7];
2050     alpha9  = x[11*i+8];
2051     alpha10 = x[11*i+9];
2052     alpha11 = x[11*i+10];
2053     while (n-->0) {
2054       y[11*(*idx)]    += alpha1*(*v);
2055       y[11*(*idx)+1]  += alpha2*(*v);
2056       y[11*(*idx)+2]  += alpha3*(*v);
2057       y[11*(*idx)+3]  += alpha4*(*v);
2058       y[11*(*idx)+4]  += alpha5*(*v);
2059       y[11*(*idx)+5]  += alpha6*(*v);
2060       y[11*(*idx)+6]  += alpha7*(*v);
2061       y[11*(*idx)+7]  += alpha8*(*v);
2062       y[11*(*idx)+8]  += alpha9*(*v);
2063       y[11*(*idx)+9]  += alpha10*(*v);
2064       y[11*(*idx)+10] += alpha11*(*v);
2065       idx++; v++;
2066     }
2067   }
2068   ierr = PetscLogFlops(22.0*a->nz);CHKERRQ(ierr);
2069   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2070   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 /*--------------------------------------------------------------------------------------------*/
2075 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2076 {
2077   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2078   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2079   const PetscScalar *x,*v;
2080   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2081   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2082   PetscErrorCode    ierr;
2083   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2084   PetscInt          nonzerorow=0,n,i,jrow,j;
2085 
2086   PetscFunctionBegin;
2087   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2088   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2089   idx  = a->j;
2090   v    = a->a;
2091   ii   = a->i;
2092 
2093   for (i=0; i<m; i++) {
2094     jrow  = ii[i];
2095     n     = ii[i+1] - jrow;
2096     sum1  = 0.0;
2097     sum2  = 0.0;
2098     sum3  = 0.0;
2099     sum4  = 0.0;
2100     sum5  = 0.0;
2101     sum6  = 0.0;
2102     sum7  = 0.0;
2103     sum8  = 0.0;
2104     sum9  = 0.0;
2105     sum10 = 0.0;
2106     sum11 = 0.0;
2107     sum12 = 0.0;
2108     sum13 = 0.0;
2109     sum14 = 0.0;
2110     sum15 = 0.0;
2111     sum16 = 0.0;
2112 
2113     nonzerorow += (n>0);
2114     for (j=0; j<n; j++) {
2115       sum1  += v[jrow]*x[16*idx[jrow]];
2116       sum2  += v[jrow]*x[16*idx[jrow]+1];
2117       sum3  += v[jrow]*x[16*idx[jrow]+2];
2118       sum4  += v[jrow]*x[16*idx[jrow]+3];
2119       sum5  += v[jrow]*x[16*idx[jrow]+4];
2120       sum6  += v[jrow]*x[16*idx[jrow]+5];
2121       sum7  += v[jrow]*x[16*idx[jrow]+6];
2122       sum8  += v[jrow]*x[16*idx[jrow]+7];
2123       sum9  += v[jrow]*x[16*idx[jrow]+8];
2124       sum10 += v[jrow]*x[16*idx[jrow]+9];
2125       sum11 += v[jrow]*x[16*idx[jrow]+10];
2126       sum12 += v[jrow]*x[16*idx[jrow]+11];
2127       sum13 += v[jrow]*x[16*idx[jrow]+12];
2128       sum14 += v[jrow]*x[16*idx[jrow]+13];
2129       sum15 += v[jrow]*x[16*idx[jrow]+14];
2130       sum16 += v[jrow]*x[16*idx[jrow]+15];
2131       jrow++;
2132     }
2133     y[16*i]    = sum1;
2134     y[16*i+1]  = sum2;
2135     y[16*i+2]  = sum3;
2136     y[16*i+3]  = sum4;
2137     y[16*i+4]  = sum5;
2138     y[16*i+5]  = sum6;
2139     y[16*i+6]  = sum7;
2140     y[16*i+7]  = sum8;
2141     y[16*i+8]  = sum9;
2142     y[16*i+9]  = sum10;
2143     y[16*i+10] = sum11;
2144     y[16*i+11] = sum12;
2145     y[16*i+12] = sum13;
2146     y[16*i+13] = sum14;
2147     y[16*i+14] = sum15;
2148     y[16*i+15] = sum16;
2149   }
2150 
2151   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
2152   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2153   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2154   PetscFunctionReturn(0);
2155 }
2156 
2157 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2158 {
2159   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2160   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2161   const PetscScalar *x,*v;
2162   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2163   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2164   PetscErrorCode    ierr;
2165   const PetscInt    m = b->AIJ->rmap->n,*idx;
2166   PetscInt          n,i;
2167 
2168   PetscFunctionBegin;
2169   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2170   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2171   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2172 
2173   for (i=0; i<m; i++) {
2174     idx     = a->j + a->i[i];
2175     v       = a->a + a->i[i];
2176     n       = a->i[i+1] - a->i[i];
2177     alpha1  = x[16*i];
2178     alpha2  = x[16*i+1];
2179     alpha3  = x[16*i+2];
2180     alpha4  = x[16*i+3];
2181     alpha5  = x[16*i+4];
2182     alpha6  = x[16*i+5];
2183     alpha7  = x[16*i+6];
2184     alpha8  = x[16*i+7];
2185     alpha9  = x[16*i+8];
2186     alpha10 = x[16*i+9];
2187     alpha11 = x[16*i+10];
2188     alpha12 = x[16*i+11];
2189     alpha13 = x[16*i+12];
2190     alpha14 = x[16*i+13];
2191     alpha15 = x[16*i+14];
2192     alpha16 = x[16*i+15];
2193     while (n-->0) {
2194       y[16*(*idx)]    += alpha1*(*v);
2195       y[16*(*idx)+1]  += alpha2*(*v);
2196       y[16*(*idx)+2]  += alpha3*(*v);
2197       y[16*(*idx)+3]  += alpha4*(*v);
2198       y[16*(*idx)+4]  += alpha5*(*v);
2199       y[16*(*idx)+5]  += alpha6*(*v);
2200       y[16*(*idx)+6]  += alpha7*(*v);
2201       y[16*(*idx)+7]  += alpha8*(*v);
2202       y[16*(*idx)+8]  += alpha9*(*v);
2203       y[16*(*idx)+9]  += alpha10*(*v);
2204       y[16*(*idx)+10] += alpha11*(*v);
2205       y[16*(*idx)+11] += alpha12*(*v);
2206       y[16*(*idx)+12] += alpha13*(*v);
2207       y[16*(*idx)+13] += alpha14*(*v);
2208       y[16*(*idx)+14] += alpha15*(*v);
2209       y[16*(*idx)+15] += alpha16*(*v);
2210       idx++; v++;
2211     }
2212   }
2213   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2214   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2215   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2220 {
2221   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2222   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2223   const PetscScalar *x,*v;
2224   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2225   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2226   PetscErrorCode    ierr;
2227   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2228   PetscInt          n,i,jrow,j;
2229 
2230   PetscFunctionBegin;
2231   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2232   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2233   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2234   idx  = a->j;
2235   v    = a->a;
2236   ii   = a->i;
2237 
2238   for (i=0; i<m; i++) {
2239     jrow  = ii[i];
2240     n     = ii[i+1] - jrow;
2241     sum1  = 0.0;
2242     sum2  = 0.0;
2243     sum3  = 0.0;
2244     sum4  = 0.0;
2245     sum5  = 0.0;
2246     sum6  = 0.0;
2247     sum7  = 0.0;
2248     sum8  = 0.0;
2249     sum9  = 0.0;
2250     sum10 = 0.0;
2251     sum11 = 0.0;
2252     sum12 = 0.0;
2253     sum13 = 0.0;
2254     sum14 = 0.0;
2255     sum15 = 0.0;
2256     sum16 = 0.0;
2257     for (j=0; j<n; j++) {
2258       sum1  += v[jrow]*x[16*idx[jrow]];
2259       sum2  += v[jrow]*x[16*idx[jrow]+1];
2260       sum3  += v[jrow]*x[16*idx[jrow]+2];
2261       sum4  += v[jrow]*x[16*idx[jrow]+3];
2262       sum5  += v[jrow]*x[16*idx[jrow]+4];
2263       sum6  += v[jrow]*x[16*idx[jrow]+5];
2264       sum7  += v[jrow]*x[16*idx[jrow]+6];
2265       sum8  += v[jrow]*x[16*idx[jrow]+7];
2266       sum9  += v[jrow]*x[16*idx[jrow]+8];
2267       sum10 += v[jrow]*x[16*idx[jrow]+9];
2268       sum11 += v[jrow]*x[16*idx[jrow]+10];
2269       sum12 += v[jrow]*x[16*idx[jrow]+11];
2270       sum13 += v[jrow]*x[16*idx[jrow]+12];
2271       sum14 += v[jrow]*x[16*idx[jrow]+13];
2272       sum15 += v[jrow]*x[16*idx[jrow]+14];
2273       sum16 += v[jrow]*x[16*idx[jrow]+15];
2274       jrow++;
2275     }
2276     y[16*i]    += sum1;
2277     y[16*i+1]  += sum2;
2278     y[16*i+2]  += sum3;
2279     y[16*i+3]  += sum4;
2280     y[16*i+4]  += sum5;
2281     y[16*i+5]  += sum6;
2282     y[16*i+6]  += sum7;
2283     y[16*i+7]  += sum8;
2284     y[16*i+8]  += sum9;
2285     y[16*i+9]  += sum10;
2286     y[16*i+10] += sum11;
2287     y[16*i+11] += sum12;
2288     y[16*i+12] += sum13;
2289     y[16*i+13] += sum14;
2290     y[16*i+14] += sum15;
2291     y[16*i+15] += sum16;
2292   }
2293 
2294   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2295   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2296   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2297   PetscFunctionReturn(0);
2298 }
2299 
2300 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2301 {
2302   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2303   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2304   const PetscScalar *x,*v;
2305   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2306   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2307   PetscErrorCode    ierr;
2308   const PetscInt    m = b->AIJ->rmap->n,*idx;
2309   PetscInt          n,i;
2310 
2311   PetscFunctionBegin;
2312   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2313   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2314   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2315   for (i=0; i<m; i++) {
2316     idx     = a->j + a->i[i];
2317     v       = a->a + a->i[i];
2318     n       = a->i[i+1] - a->i[i];
2319     alpha1  = x[16*i];
2320     alpha2  = x[16*i+1];
2321     alpha3  = x[16*i+2];
2322     alpha4  = x[16*i+3];
2323     alpha5  = x[16*i+4];
2324     alpha6  = x[16*i+5];
2325     alpha7  = x[16*i+6];
2326     alpha8  = x[16*i+7];
2327     alpha9  = x[16*i+8];
2328     alpha10 = x[16*i+9];
2329     alpha11 = x[16*i+10];
2330     alpha12 = x[16*i+11];
2331     alpha13 = x[16*i+12];
2332     alpha14 = x[16*i+13];
2333     alpha15 = x[16*i+14];
2334     alpha16 = x[16*i+15];
2335     while (n-->0) {
2336       y[16*(*idx)]    += alpha1*(*v);
2337       y[16*(*idx)+1]  += alpha2*(*v);
2338       y[16*(*idx)+2]  += alpha3*(*v);
2339       y[16*(*idx)+3]  += alpha4*(*v);
2340       y[16*(*idx)+4]  += alpha5*(*v);
2341       y[16*(*idx)+5]  += alpha6*(*v);
2342       y[16*(*idx)+6]  += alpha7*(*v);
2343       y[16*(*idx)+7]  += alpha8*(*v);
2344       y[16*(*idx)+8]  += alpha9*(*v);
2345       y[16*(*idx)+9]  += alpha10*(*v);
2346       y[16*(*idx)+10] += alpha11*(*v);
2347       y[16*(*idx)+11] += alpha12*(*v);
2348       y[16*(*idx)+12] += alpha13*(*v);
2349       y[16*(*idx)+13] += alpha14*(*v);
2350       y[16*(*idx)+14] += alpha15*(*v);
2351       y[16*(*idx)+15] += alpha16*(*v);
2352       idx++; v++;
2353     }
2354   }
2355   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2356   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2357   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2358   PetscFunctionReturn(0);
2359 }
2360 
2361 /*--------------------------------------------------------------------------------------------*/
2362 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2363 {
2364   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2365   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2366   const PetscScalar *x,*v;
2367   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2368   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2369   PetscErrorCode    ierr;
2370   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2371   PetscInt          nonzerorow=0,n,i,jrow,j;
2372 
2373   PetscFunctionBegin;
2374   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2375   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2376   idx  = a->j;
2377   v    = a->a;
2378   ii   = a->i;
2379 
2380   for (i=0; i<m; i++) {
2381     jrow  = ii[i];
2382     n     = ii[i+1] - jrow;
2383     sum1  = 0.0;
2384     sum2  = 0.0;
2385     sum3  = 0.0;
2386     sum4  = 0.0;
2387     sum5  = 0.0;
2388     sum6  = 0.0;
2389     sum7  = 0.0;
2390     sum8  = 0.0;
2391     sum9  = 0.0;
2392     sum10 = 0.0;
2393     sum11 = 0.0;
2394     sum12 = 0.0;
2395     sum13 = 0.0;
2396     sum14 = 0.0;
2397     sum15 = 0.0;
2398     sum16 = 0.0;
2399     sum17 = 0.0;
2400     sum18 = 0.0;
2401 
2402     nonzerorow += (n>0);
2403     for (j=0; j<n; j++) {
2404       sum1  += v[jrow]*x[18*idx[jrow]];
2405       sum2  += v[jrow]*x[18*idx[jrow]+1];
2406       sum3  += v[jrow]*x[18*idx[jrow]+2];
2407       sum4  += v[jrow]*x[18*idx[jrow]+3];
2408       sum5  += v[jrow]*x[18*idx[jrow]+4];
2409       sum6  += v[jrow]*x[18*idx[jrow]+5];
2410       sum7  += v[jrow]*x[18*idx[jrow]+6];
2411       sum8  += v[jrow]*x[18*idx[jrow]+7];
2412       sum9  += v[jrow]*x[18*idx[jrow]+8];
2413       sum10 += v[jrow]*x[18*idx[jrow]+9];
2414       sum11 += v[jrow]*x[18*idx[jrow]+10];
2415       sum12 += v[jrow]*x[18*idx[jrow]+11];
2416       sum13 += v[jrow]*x[18*idx[jrow]+12];
2417       sum14 += v[jrow]*x[18*idx[jrow]+13];
2418       sum15 += v[jrow]*x[18*idx[jrow]+14];
2419       sum16 += v[jrow]*x[18*idx[jrow]+15];
2420       sum17 += v[jrow]*x[18*idx[jrow]+16];
2421       sum18 += v[jrow]*x[18*idx[jrow]+17];
2422       jrow++;
2423     }
2424     y[18*i]    = sum1;
2425     y[18*i+1]  = sum2;
2426     y[18*i+2]  = sum3;
2427     y[18*i+3]  = sum4;
2428     y[18*i+4]  = sum5;
2429     y[18*i+5]  = sum6;
2430     y[18*i+6]  = sum7;
2431     y[18*i+7]  = sum8;
2432     y[18*i+8]  = sum9;
2433     y[18*i+9]  = sum10;
2434     y[18*i+10] = sum11;
2435     y[18*i+11] = sum12;
2436     y[18*i+12] = sum13;
2437     y[18*i+13] = sum14;
2438     y[18*i+14] = sum15;
2439     y[18*i+15] = sum16;
2440     y[18*i+16] = sum17;
2441     y[18*i+17] = sum18;
2442   }
2443 
2444   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
2445   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2446   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2447   PetscFunctionReturn(0);
2448 }
2449 
2450 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2451 {
2452   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2453   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2454   const PetscScalar *x,*v;
2455   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2456   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2457   PetscErrorCode    ierr;
2458   const PetscInt    m = b->AIJ->rmap->n,*idx;
2459   PetscInt          n,i;
2460 
2461   PetscFunctionBegin;
2462   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2463   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2464   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2465 
2466   for (i=0; i<m; i++) {
2467     idx     = a->j + a->i[i];
2468     v       = a->a + a->i[i];
2469     n       = a->i[i+1] - a->i[i];
2470     alpha1  = x[18*i];
2471     alpha2  = x[18*i+1];
2472     alpha3  = x[18*i+2];
2473     alpha4  = x[18*i+3];
2474     alpha5  = x[18*i+4];
2475     alpha6  = x[18*i+5];
2476     alpha7  = x[18*i+6];
2477     alpha8  = x[18*i+7];
2478     alpha9  = x[18*i+8];
2479     alpha10 = x[18*i+9];
2480     alpha11 = x[18*i+10];
2481     alpha12 = x[18*i+11];
2482     alpha13 = x[18*i+12];
2483     alpha14 = x[18*i+13];
2484     alpha15 = x[18*i+14];
2485     alpha16 = x[18*i+15];
2486     alpha17 = x[18*i+16];
2487     alpha18 = x[18*i+17];
2488     while (n-->0) {
2489       y[18*(*idx)]    += alpha1*(*v);
2490       y[18*(*idx)+1]  += alpha2*(*v);
2491       y[18*(*idx)+2]  += alpha3*(*v);
2492       y[18*(*idx)+3]  += alpha4*(*v);
2493       y[18*(*idx)+4]  += alpha5*(*v);
2494       y[18*(*idx)+5]  += alpha6*(*v);
2495       y[18*(*idx)+6]  += alpha7*(*v);
2496       y[18*(*idx)+7]  += alpha8*(*v);
2497       y[18*(*idx)+8]  += alpha9*(*v);
2498       y[18*(*idx)+9]  += alpha10*(*v);
2499       y[18*(*idx)+10] += alpha11*(*v);
2500       y[18*(*idx)+11] += alpha12*(*v);
2501       y[18*(*idx)+12] += alpha13*(*v);
2502       y[18*(*idx)+13] += alpha14*(*v);
2503       y[18*(*idx)+14] += alpha15*(*v);
2504       y[18*(*idx)+15] += alpha16*(*v);
2505       y[18*(*idx)+16] += alpha17*(*v);
2506       y[18*(*idx)+17] += alpha18*(*v);
2507       idx++; v++;
2508     }
2509   }
2510   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2511   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2512   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2517 {
2518   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2519   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2520   const PetscScalar *x,*v;
2521   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2522   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2523   PetscErrorCode    ierr;
2524   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2525   PetscInt          n,i,jrow,j;
2526 
2527   PetscFunctionBegin;
2528   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2529   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2530   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2531   idx  = a->j;
2532   v    = a->a;
2533   ii   = a->i;
2534 
2535   for (i=0; i<m; i++) {
2536     jrow  = ii[i];
2537     n     = ii[i+1] - jrow;
2538     sum1  = 0.0;
2539     sum2  = 0.0;
2540     sum3  = 0.0;
2541     sum4  = 0.0;
2542     sum5  = 0.0;
2543     sum6  = 0.0;
2544     sum7  = 0.0;
2545     sum8  = 0.0;
2546     sum9  = 0.0;
2547     sum10 = 0.0;
2548     sum11 = 0.0;
2549     sum12 = 0.0;
2550     sum13 = 0.0;
2551     sum14 = 0.0;
2552     sum15 = 0.0;
2553     sum16 = 0.0;
2554     sum17 = 0.0;
2555     sum18 = 0.0;
2556     for (j=0; j<n; j++) {
2557       sum1  += v[jrow]*x[18*idx[jrow]];
2558       sum2  += v[jrow]*x[18*idx[jrow]+1];
2559       sum3  += v[jrow]*x[18*idx[jrow]+2];
2560       sum4  += v[jrow]*x[18*idx[jrow]+3];
2561       sum5  += v[jrow]*x[18*idx[jrow]+4];
2562       sum6  += v[jrow]*x[18*idx[jrow]+5];
2563       sum7  += v[jrow]*x[18*idx[jrow]+6];
2564       sum8  += v[jrow]*x[18*idx[jrow]+7];
2565       sum9  += v[jrow]*x[18*idx[jrow]+8];
2566       sum10 += v[jrow]*x[18*idx[jrow]+9];
2567       sum11 += v[jrow]*x[18*idx[jrow]+10];
2568       sum12 += v[jrow]*x[18*idx[jrow]+11];
2569       sum13 += v[jrow]*x[18*idx[jrow]+12];
2570       sum14 += v[jrow]*x[18*idx[jrow]+13];
2571       sum15 += v[jrow]*x[18*idx[jrow]+14];
2572       sum16 += v[jrow]*x[18*idx[jrow]+15];
2573       sum17 += v[jrow]*x[18*idx[jrow]+16];
2574       sum18 += v[jrow]*x[18*idx[jrow]+17];
2575       jrow++;
2576     }
2577     y[18*i]    += sum1;
2578     y[18*i+1]  += sum2;
2579     y[18*i+2]  += sum3;
2580     y[18*i+3]  += sum4;
2581     y[18*i+4]  += sum5;
2582     y[18*i+5]  += sum6;
2583     y[18*i+6]  += sum7;
2584     y[18*i+7]  += sum8;
2585     y[18*i+8]  += sum9;
2586     y[18*i+9]  += sum10;
2587     y[18*i+10] += sum11;
2588     y[18*i+11] += sum12;
2589     y[18*i+12] += sum13;
2590     y[18*i+13] += sum14;
2591     y[18*i+14] += sum15;
2592     y[18*i+15] += sum16;
2593     y[18*i+16] += sum17;
2594     y[18*i+17] += sum18;
2595   }
2596 
2597   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2598   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2599   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2604 {
2605   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2606   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2607   const PetscScalar *x,*v;
2608   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2609   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2610   PetscErrorCode    ierr;
2611   const PetscInt    m = b->AIJ->rmap->n,*idx;
2612   PetscInt          n,i;
2613 
2614   PetscFunctionBegin;
2615   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2616   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2617   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2618   for (i=0; i<m; i++) {
2619     idx     = a->j + a->i[i];
2620     v       = a->a + a->i[i];
2621     n       = a->i[i+1] - a->i[i];
2622     alpha1  = x[18*i];
2623     alpha2  = x[18*i+1];
2624     alpha3  = x[18*i+2];
2625     alpha4  = x[18*i+3];
2626     alpha5  = x[18*i+4];
2627     alpha6  = x[18*i+5];
2628     alpha7  = x[18*i+6];
2629     alpha8  = x[18*i+7];
2630     alpha9  = x[18*i+8];
2631     alpha10 = x[18*i+9];
2632     alpha11 = x[18*i+10];
2633     alpha12 = x[18*i+11];
2634     alpha13 = x[18*i+12];
2635     alpha14 = x[18*i+13];
2636     alpha15 = x[18*i+14];
2637     alpha16 = x[18*i+15];
2638     alpha17 = x[18*i+16];
2639     alpha18 = x[18*i+17];
2640     while (n-->0) {
2641       y[18*(*idx)]    += alpha1*(*v);
2642       y[18*(*idx)+1]  += alpha2*(*v);
2643       y[18*(*idx)+2]  += alpha3*(*v);
2644       y[18*(*idx)+3]  += alpha4*(*v);
2645       y[18*(*idx)+4]  += alpha5*(*v);
2646       y[18*(*idx)+5]  += alpha6*(*v);
2647       y[18*(*idx)+6]  += alpha7*(*v);
2648       y[18*(*idx)+7]  += alpha8*(*v);
2649       y[18*(*idx)+8]  += alpha9*(*v);
2650       y[18*(*idx)+9]  += alpha10*(*v);
2651       y[18*(*idx)+10] += alpha11*(*v);
2652       y[18*(*idx)+11] += alpha12*(*v);
2653       y[18*(*idx)+12] += alpha13*(*v);
2654       y[18*(*idx)+13] += alpha14*(*v);
2655       y[18*(*idx)+14] += alpha15*(*v);
2656       y[18*(*idx)+15] += alpha16*(*v);
2657       y[18*(*idx)+16] += alpha17*(*v);
2658       y[18*(*idx)+17] += alpha18*(*v);
2659       idx++; v++;
2660     }
2661   }
2662   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2663   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2664   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2665   PetscFunctionReturn(0);
2666 }
2667 
2668 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2669 {
2670   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2671   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2672   const PetscScalar *x,*v;
2673   PetscScalar       *y,*sums;
2674   PetscErrorCode    ierr;
2675   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2676   PetscInt          n,i,jrow,j,dof = b->dof,k;
2677 
2678   PetscFunctionBegin;
2679   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2680   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2681   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2682   idx  = a->j;
2683   v    = a->a;
2684   ii   = a->i;
2685 
2686   for (i=0; i<m; i++) {
2687     jrow = ii[i];
2688     n    = ii[i+1] - jrow;
2689     sums = y + dof*i;
2690     for (j=0; j<n; j++) {
2691       for (k=0; k<dof; k++) {
2692         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2693       }
2694       jrow++;
2695     }
2696   }
2697 
2698   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2699   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2700   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2701   PetscFunctionReturn(0);
2702 }
2703 
2704 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2705 {
2706   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2707   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2708   const PetscScalar *x,*v;
2709   PetscScalar       *y,*sums;
2710   PetscErrorCode    ierr;
2711   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2712   PetscInt          n,i,jrow,j,dof = b->dof,k;
2713 
2714   PetscFunctionBegin;
2715   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2716   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2717   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2718   idx  = a->j;
2719   v    = a->a;
2720   ii   = a->i;
2721 
2722   for (i=0; i<m; i++) {
2723     jrow = ii[i];
2724     n    = ii[i+1] - jrow;
2725     sums = y + dof*i;
2726     for (j=0; j<n; j++) {
2727       for (k=0; k<dof; k++) {
2728         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2729       }
2730       jrow++;
2731     }
2732   }
2733 
2734   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2735   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2736   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2737   PetscFunctionReturn(0);
2738 }
2739 
2740 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2741 {
2742   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2743   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2744   const PetscScalar *x,*v,*alpha;
2745   PetscScalar       *y;
2746   PetscErrorCode    ierr;
2747   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2748   PetscInt          n,i,k;
2749 
2750   PetscFunctionBegin;
2751   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2752   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2753   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2754   for (i=0; i<m; i++) {
2755     idx   = a->j + a->i[i];
2756     v     = a->a + a->i[i];
2757     n     = a->i[i+1] - a->i[i];
2758     alpha = x + dof*i;
2759     while (n-->0) {
2760       for (k=0; k<dof; k++) {
2761         y[dof*(*idx)+k] += alpha[k]*(*v);
2762       }
2763       idx++; v++;
2764     }
2765   }
2766   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2767   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2768   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2769   PetscFunctionReturn(0);
2770 }
2771 
2772 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2773 {
2774   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2775   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2776   const PetscScalar *x,*v,*alpha;
2777   PetscScalar       *y;
2778   PetscErrorCode    ierr;
2779   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2780   PetscInt          n,i,k;
2781 
2782   PetscFunctionBegin;
2783   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2784   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2785   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2786   for (i=0; i<m; i++) {
2787     idx   = a->j + a->i[i];
2788     v     = a->a + a->i[i];
2789     n     = a->i[i+1] - a->i[i];
2790     alpha = x + dof*i;
2791     while (n-->0) {
2792       for (k=0; k<dof; k++) {
2793         y[dof*(*idx)+k] += alpha[k]*(*v);
2794       }
2795       idx++; v++;
2796     }
2797   }
2798   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2799   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2800   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2801   PetscFunctionReturn(0);
2802 }
2803 
2804 /*===================================================================================*/
2805 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2806 {
2807   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2808   PetscErrorCode ierr;
2809 
2810   PetscFunctionBegin;
2811   /* start the scatter */
2812   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2813   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2814   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2815   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2816   PetscFunctionReturn(0);
2817 }
2818 
2819 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2820 {
2821   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2822   PetscErrorCode ierr;
2823 
2824   PetscFunctionBegin;
2825   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2826   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2827   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2828   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2829   PetscFunctionReturn(0);
2830 }
2831 
2832 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2833 {
2834   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2835   PetscErrorCode ierr;
2836 
2837   PetscFunctionBegin;
2838   /* start the scatter */
2839   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2840   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2841   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2842   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2843   PetscFunctionReturn(0);
2844 }
2845 
2846 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2847 {
2848   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2849   PetscErrorCode ierr;
2850 
2851   PetscFunctionBegin;
2852   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2853   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2854   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2855   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2856   PetscFunctionReturn(0);
2857 }
2858 
2859 /* ----------------------------------------------------------------*/
2860 PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
2861 {
2862   Mat_Product    *product = C->product;
2863 
2864   PetscFunctionBegin;
2865   if (product->type == MATPRODUCT_PtAP) {
2866     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2867   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]);
2868   PetscFunctionReturn(0);
2869 }
2870 
2871 PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
2872 {
2873   PetscErrorCode ierr;
2874   Mat_Product    *product = C->product;
2875   PetscBool      flg = PETSC_FALSE;
2876   Mat            A=product->A,P=product->B;
2877   PetscInt       alg=1; /* set default algorithm */
2878 #if !defined(PETSC_HAVE_HYPRE)
2879   const char     *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"};
2880   PetscInt       nalg=4;
2881 #else
2882   const char     *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"};
2883   PetscInt       nalg=5;
2884 #endif
2885 
2886   PetscFunctionBegin;
2887   if (product->type != MATPRODUCT_PtAP) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]);
2888 
2889   /* PtAP */
2890   /* Check matrix local sizes */
2891   if (A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%D, %D) != Prow (%D,%D)",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend);
2892   if (A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%D, %D) != Prow (%D,%D)",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend);
2893 
2894   /* Set the default algorithm */
2895   ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr);
2896   if (flg) {
2897     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2898   }
2899 
2900   /* Get runtime option */
2901   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
2902   ierr = PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2903   if (flg) {
2904     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2905   }
2906   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2907 
2908   ierr = PetscStrcmp(C->product->alg,"allatonce",&flg);CHKERRQ(ierr);
2909   if (flg) {
2910     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2911     PetscFunctionReturn(0);
2912   }
2913 
2914   ierr = PetscStrcmp(C->product->alg,"allatonce_merged",&flg);CHKERRQ(ierr);
2915   if (flg) {
2916     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2917     PetscFunctionReturn(0);
2918   }
2919 
2920   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
2921   ierr = PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n");CHKERRQ(ierr);
2922   ierr = MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);CHKERRQ(ierr);
2923   ierr = MatProductSetFromOptions(C);CHKERRQ(ierr);
2924   PetscFunctionReturn(0);
2925 }
2926 
2927 /* ----------------------------------------------------------------*/
2928 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)
2929 {
2930   PetscErrorCode     ierr;
2931   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2932   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2933   Mat                P         =pp->AIJ;
2934   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2935   PetscInt           *pti,*ptj,*ptJ;
2936   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2937   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2938   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2939   MatScalar          *ca;
2940   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2941 
2942   PetscFunctionBegin;
2943   /* Get ij structure of P^T */
2944   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2945 
2946   cn = pn*ppdof;
2947   /* Allocate ci array, arrays for fill computation and */
2948   /* free space for accumulating nonzero column info */
2949   ierr  = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr);
2950   ci[0] = 0;
2951 
2952   /* Work arrays for rows of P^T*A */
2953   ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr);
2954   ierr = PetscArrayzero(ptadenserow,an);CHKERRQ(ierr);
2955   ierr = PetscArrayzero(denserow,cn);CHKERRQ(ierr);
2956 
2957   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2958   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2959   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2960   ierr          = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr);
2961   current_space = free_space;
2962 
2963   /* Determine symbolic info for each row of C: */
2964   for (i=0; i<pn; i++) {
2965     ptnzi = pti[i+1] - pti[i];
2966     ptJ   = ptj + pti[i];
2967     for (dof=0; dof<ppdof; dof++) {
2968       ptanzi = 0;
2969       /* Determine symbolic row of PtA: */
2970       for (j=0; j<ptnzi; j++) {
2971         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2972         arow = ptJ[j]*ppdof + dof;
2973         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2974         anzj = ai[arow+1] - ai[arow];
2975         ajj  = aj + ai[arow];
2976         for (k=0; k<anzj; k++) {
2977           if (!ptadenserow[ajj[k]]) {
2978             ptadenserow[ajj[k]]    = -1;
2979             ptasparserow[ptanzi++] = ajj[k];
2980           }
2981         }
2982       }
2983       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2984       ptaj = ptasparserow;
2985       cnzi = 0;
2986       for (j=0; j<ptanzi; j++) {
2987         /* Get offset within block of P */
2988         pshift = *ptaj%ppdof;
2989         /* Get block row of P */
2990         prow = (*ptaj++)/ppdof; /* integer division */
2991         /* P has same number of nonzeros per row as the compressed form */
2992         pnzj = pi[prow+1] - pi[prow];
2993         pjj  = pj + pi[prow];
2994         for (k=0;k<pnzj;k++) {
2995           /* Locations in C are shifted by the offset within the block */
2996           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2997           if (!denserow[pjj[k]*ppdof+pshift]) {
2998             denserow[pjj[k]*ppdof+pshift] = -1;
2999             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
3000           }
3001         }
3002       }
3003 
3004       /* sort sparserow */
3005       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
3006 
3007       /* If free space is not available, make more free space */
3008       /* Double the amount of total space in the list */
3009       if (current_space->local_remaining<cnzi) {
3010         ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
3011       }
3012 
3013       /* Copy data into free space, and zero out denserows */
3014       ierr = PetscArraycpy(current_space->array,sparserow,cnzi);CHKERRQ(ierr);
3015 
3016       current_space->array           += cnzi;
3017       current_space->local_used      += cnzi;
3018       current_space->local_remaining -= cnzi;
3019 
3020       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
3021       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
3022 
3023       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3024       /*        For now, we will recompute what is needed. */
3025       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3026     }
3027   }
3028   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3029   /* Allocate space for cj, initialize cj, and */
3030   /* destroy list of free space and other temporary array(s) */
3031   ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr);
3032   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
3033   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
3034 
3035   /* Allocate space for ca */
3036   ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr);
3037 
3038   /* put together the new matrix */
3039   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C);CHKERRQ(ierr);
3040   ierr = MatSetBlockSize(C,pp->dof);CHKERRQ(ierr);
3041 
3042   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3043   /* Since these are PETSc arrays, change flags to free them as necessary. */
3044   c          = (Mat_SeqAIJ*)(C->data);
3045   c->free_a  = PETSC_TRUE;
3046   c->free_ij = PETSC_TRUE;
3047   c->nonew   = 0;
3048 
3049   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3050   C->ops->productnumeric = MatProductNumeric_PtAP;
3051 
3052   /* Clean up. */
3053   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
3054   PetscFunctionReturn(0);
3055 }
3056 
3057 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3058 {
3059   /* This routine requires testing -- first draft only */
3060   PetscErrorCode  ierr;
3061   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3062   Mat             P  =pp->AIJ;
3063   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
3064   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
3065   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3066   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3067   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3068   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3069   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3070   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3071   MatScalar       *ca=c->a,*caj,*apa;
3072 
3073   PetscFunctionBegin;
3074   /* Allocate temporary array for storage of one row of A*P */
3075   ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr);
3076 
3077   /* Clear old values in C */
3078   ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr);
3079 
3080   for (i=0; i<am; i++) {
3081     /* Form sparse row of A*P */
3082     anzi  = ai[i+1] - ai[i];
3083     apnzj = 0;
3084     for (j=0; j<anzi; j++) {
3085       /* Get offset within block of P */
3086       pshift = *aj%ppdof;
3087       /* Get block row of P */
3088       prow = *aj++/ppdof;   /* integer division */
3089       pnzj = pi[prow+1] - pi[prow];
3090       pjj  = pj + pi[prow];
3091       paj  = pa + pi[prow];
3092       for (k=0; k<pnzj; k++) {
3093         poffset = pjj[k]*ppdof+pshift;
3094         if (!apjdense[poffset]) {
3095           apjdense[poffset] = -1;
3096           apj[apnzj++]      = poffset;
3097         }
3098         apa[poffset] += (*aa)*paj[k];
3099       }
3100       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
3101       aa++;
3102     }
3103 
3104     /* Sort the j index array for quick sparse axpy. */
3105     /* Note: a array does not need sorting as it is in dense storage locations. */
3106     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
3107 
3108     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3109     prow    = i/ppdof; /* integer division */
3110     pshift  = i%ppdof;
3111     poffset = pi[prow];
3112     pnzi    = pi[prow+1] - poffset;
3113     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3114     pJ = pj+poffset;
3115     pA = pa+poffset;
3116     for (j=0; j<pnzi; j++) {
3117       crow = (*pJ)*ppdof+pshift;
3118       cjj  = cj + ci[crow];
3119       caj  = ca + ci[crow];
3120       pJ++;
3121       /* Perform sparse axpy operation.  Note cjj includes apj. */
3122       for (k=0,nextap=0; nextap<apnzj; k++) {
3123         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3124       }
3125       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
3126       pA++;
3127     }
3128 
3129     /* Zero the current row info for A*P */
3130     for (j=0; j<apnzj; j++) {
3131       apa[apj[j]]      = 0.;
3132       apjdense[apj[j]] = 0;
3133     }
3134   }
3135 
3136   /* Assemble the final matrix and clean up */
3137   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3138   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3139   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
3140   PetscFunctionReturn(0);
3141 }
3142 
3143 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
3144 {
3145   PetscErrorCode      ierr;
3146   Mat_Product         *product = C->product;
3147   Mat                 A=product->A,P=product->B;
3148 
3149   PetscFunctionBegin;
3150   ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C);CHKERRQ(ierr);
3151   PetscFunctionReturn(0);
3152 }
3153 
3154 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3155 {
3156   PetscFunctionBegin;
3157   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3158 }
3159 
3160 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3161 {
3162   PetscFunctionBegin;
3163   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3164 }
3165 
3166 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);
3167 
3168 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3169 {
3170   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3171   PetscErrorCode  ierr;
3172 
3173   PetscFunctionBegin;
3174 
3175   ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);CHKERRQ(ierr);
3176   PetscFunctionReturn(0);
3177 }
3178 
3179 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat);
3180 
3181 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)
3182 {
3183   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3184   PetscErrorCode  ierr;
3185 
3186   PetscFunctionBegin;
3187   ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr);
3188   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3189   PetscFunctionReturn(0);
3190 }
3191 
3192 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);
3193 
3194 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3195 {
3196   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3197   PetscErrorCode  ierr;
3198 
3199   PetscFunctionBegin;
3200 
3201   ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);CHKERRQ(ierr);
3202   PetscFunctionReturn(0);
3203 }
3204 
3205 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat);
3206 
3207 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)
3208 {
3209   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3210   PetscErrorCode  ierr;
3211 
3212   PetscFunctionBegin;
3213 
3214   ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr);
3215   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3216   PetscFunctionReturn(0);
3217 }
3218 
3219 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3220 {
3221   PetscErrorCode      ierr;
3222   Mat_Product         *product = C->product;
3223   Mat                 A=product->A,P=product->B;
3224   PetscBool           flg;
3225 
3226   PetscFunctionBegin;
3227   ierr = PetscStrcmp(product->alg,"allatonce",&flg);CHKERRQ(ierr);
3228   if (flg) {
3229     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C);CHKERRQ(ierr);
3230     C->ops->productnumeric = MatProductNumeric_PtAP;
3231     PetscFunctionReturn(0);
3232   }
3233 
3234   ierr = PetscStrcmp(product->alg,"allatonce_merged",&flg);CHKERRQ(ierr);
3235   if (flg) {
3236     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C);CHKERRQ(ierr);
3237     C->ops->productnumeric = MatProductNumeric_PtAP;
3238     PetscFunctionReturn(0);
3239   }
3240 
3241   SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
3242 }
3243 
3244 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3245 {
3246   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3247   Mat            a    = b->AIJ,B;
3248   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3249   PetscErrorCode ierr;
3250   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3251   PetscInt       *cols;
3252   PetscScalar    *vals;
3253 
3254   PetscFunctionBegin;
3255   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3256   ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr);
3257   for (i=0; i<m; i++) {
3258     nmax = PetscMax(nmax,aij->ilen[i]);
3259     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3260   }
3261   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3262   ierr = MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);CHKERRQ(ierr);
3263   ierr = MatSetType(B,newtype);CHKERRQ(ierr);
3264   ierr = MatSeqAIJSetPreallocation(B,0,ilen);CHKERRQ(ierr);
3265   ierr = PetscFree(ilen);CHKERRQ(ierr);
3266   ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr);
3267   ii   = 0;
3268   for (i=0; i<m; i++) {
3269     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3270     for (j=0; j<dof; j++) {
3271       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3272       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3273       ii++;
3274     }
3275     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3276   }
3277   ierr = PetscFree(icols);CHKERRQ(ierr);
3278   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3279   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3280 
3281   if (reuse == MAT_INPLACE_MATRIX) {
3282     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3283   } else {
3284     *newmat = B;
3285   }
3286   PetscFunctionReturn(0);
3287 }
3288 
3289 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3290 
3291 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3292 {
3293   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3294   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3295   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3296   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3297   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3298   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3299   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3300   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3301   PetscInt       rstart,cstart,*garray,ii,k;
3302   PetscErrorCode ierr;
3303   PetscScalar    *vals,*ovals;
3304 
3305   PetscFunctionBegin;
3306   ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr);
3307   for (i=0; i<A->rmap->n/dof; i++) {
3308     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3309     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3310     for (j=0; j<dof; j++) {
3311       dnz[dof*i+j] = AIJ->ilen[i];
3312       onz[dof*i+j] = OAIJ->ilen[i];
3313     }
3314   }
3315   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3316   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3317   ierr = MatSetType(B,newtype);CHKERRQ(ierr);
3318   ierr = MatMPIAIJSetPreallocation(B,0,dnz,0,onz);CHKERRQ(ierr);
3319   ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr);
3320   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
3321 
3322   ierr   = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr);
3323   rstart = dof*maij->A->rmap->rstart;
3324   cstart = dof*maij->A->cmap->rstart;
3325   garray = mpiaij->garray;
3326 
3327   ii = rstart;
3328   for (i=0; i<A->rmap->n/dof; i++) {
3329     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3330     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3331     for (j=0; j<dof; j++) {
3332       for (k=0; k<ncols; k++) {
3333         icols[k] = cstart + dof*cols[k]+j;
3334       }
3335       for (k=0; k<oncols; k++) {
3336         oicols[k] = dof*garray[ocols[k]]+j;
3337       }
3338       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3339       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
3340       ii++;
3341     }
3342     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3343     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3344   }
3345   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
3346 
3347   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3348   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3349 
3350   if (reuse == MAT_INPLACE_MATRIX) {
3351     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3352     ((PetscObject)A)->refct = 1;
3353 
3354     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3355 
3356     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3357   } else {
3358     *newmat = B;
3359   }
3360   PetscFunctionReturn(0);
3361 }
3362 
3363 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3364 {
3365   PetscErrorCode ierr;
3366   Mat            A;
3367 
3368   PetscFunctionBegin;
3369   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3370   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
3371   ierr = MatDestroy(&A);CHKERRQ(ierr);
3372   PetscFunctionReturn(0);
3373 }
3374 
3375 PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3376 {
3377   PetscErrorCode ierr;
3378   Mat            A;
3379 
3380   PetscFunctionBegin;
3381   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3382   ierr = MatCreateSubMatrices(A,n,irow,icol,scall,submat);CHKERRQ(ierr);
3383   ierr = MatDestroy(&A);CHKERRQ(ierr);
3384   PetscFunctionReturn(0);
3385 }
3386 
3387 /* ---------------------------------------------------------------------------------- */
3388 /*@
3389   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3390   operations for multicomponent problems.  It interpolates each component the same
3391   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3392   and MATMPIAIJ for distributed matrices.
3393 
3394   Collective
3395 
3396   Input Parameters:
3397 + A - the AIJ matrix describing the action on blocks
3398 - dof - the block size (number of components per node)
3399 
3400   Output Parameter:
3401 . maij - the new MAIJ matrix
3402 
3403   Operations provided:
3404 + MatMult
3405 . MatMultTranspose
3406 . MatMultAdd
3407 . MatMultTransposeAdd
3408 - MatView
3409 
3410   Level: advanced
3411 
3412 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3413 @*/
3414 PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3415 {
3416   PetscErrorCode ierr;
3417   PetscMPIInt    size;
3418   PetscInt       n;
3419   Mat            B;
3420 #if defined(PETSC_HAVE_CUDA)
3421   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3422   PetscBool      convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3423 #endif
3424 
3425   PetscFunctionBegin;
3426   dof  = PetscAbs(dof);
3427   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3428 
3429   if (dof == 1) *maij = A;
3430   else {
3431     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3432     /* propagate vec type */
3433     ierr = MatSetVecType(B,A->defaultvectype);CHKERRQ(ierr);
3434     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3435     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3436     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3437     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3438     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3439 
3440     B->assembled = PETSC_TRUE;
3441 
3442     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3443     if (size == 1) {
3444       Mat_SeqMAIJ *b;
3445 
3446       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
3447 
3448       B->ops->setup   = NULL;
3449       B->ops->destroy = MatDestroy_SeqMAIJ;
3450       B->ops->view    = MatView_SeqMAIJ;
3451 
3452       b               = (Mat_SeqMAIJ*)B->data;
3453       b->dof          = dof;
3454       b->AIJ          = A;
3455 
3456       if (dof == 2) {
3457         B->ops->mult             = MatMult_SeqMAIJ_2;
3458         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3459         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3460         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3461       } else if (dof == 3) {
3462         B->ops->mult             = MatMult_SeqMAIJ_3;
3463         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3464         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3465         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3466       } else if (dof == 4) {
3467         B->ops->mult             = MatMult_SeqMAIJ_4;
3468         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3469         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3470         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3471       } else if (dof == 5) {
3472         B->ops->mult             = MatMult_SeqMAIJ_5;
3473         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3474         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3475         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3476       } else if (dof == 6) {
3477         B->ops->mult             = MatMult_SeqMAIJ_6;
3478         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3479         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3480         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3481       } else if (dof == 7) {
3482         B->ops->mult             = MatMult_SeqMAIJ_7;
3483         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3484         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3485         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3486       } else if (dof == 8) {
3487         B->ops->mult             = MatMult_SeqMAIJ_8;
3488         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3489         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3490         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3491       } else if (dof == 9) {
3492         B->ops->mult             = MatMult_SeqMAIJ_9;
3493         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3494         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3495         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3496       } else if (dof == 10) {
3497         B->ops->mult             = MatMult_SeqMAIJ_10;
3498         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3499         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3500         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3501       } else if (dof == 11) {
3502         B->ops->mult             = MatMult_SeqMAIJ_11;
3503         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3504         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3505         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3506       } else if (dof == 16) {
3507         B->ops->mult             = MatMult_SeqMAIJ_16;
3508         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3509         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3510         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3511       } else if (dof == 18) {
3512         B->ops->mult             = MatMult_SeqMAIJ_18;
3513         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3514         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3515         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3516       } else {
3517         B->ops->mult             = MatMult_SeqMAIJ_N;
3518         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3519         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3520         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3521       }
3522 #if defined(PETSC_HAVE_CUDA)
3523       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3524 #endif
3525       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3526       ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3527     } else {
3528       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3529       Mat_MPIMAIJ *b;
3530       IS          from,to;
3531       Vec         gvec;
3532 
3533       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3534 
3535       B->ops->setup            = NULL;
3536       B->ops->destroy          = MatDestroy_MPIMAIJ;
3537       B->ops->view             = MatView_MPIMAIJ;
3538 
3539       b      = (Mat_MPIMAIJ*)B->data;
3540       b->dof = dof;
3541       b->A   = A;
3542 
3543       ierr = MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);CHKERRQ(ierr);
3544       ierr = MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);CHKERRQ(ierr);
3545 
3546       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3547       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3548       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
3549       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3550       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3551 
3552       /* create two temporary Index sets for build scatter gather */
3553       ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3554       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3555 
3556       /* create temporary global vector to generate scatter context */
3557       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
3558 
3559       /* generate the scatter context */
3560       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3561 
3562       ierr = ISDestroy(&from);CHKERRQ(ierr);
3563       ierr = ISDestroy(&to);CHKERRQ(ierr);
3564       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3565 
3566       B->ops->mult             = MatMult_MPIMAIJ_dof;
3567       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3568       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3569       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3570 
3571 #if defined(PETSC_HAVE_CUDA)
3572       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3573 #endif
3574       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3575       ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ);CHKERRQ(ierr);
3576     }
3577     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3578     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3579     ierr = MatSetUp(B);CHKERRQ(ierr);
3580 #if defined(PETSC_HAVE_CUDA)
3581     /* temporary until we have CUDA implementation of MAIJ */
3582     {
3583       PetscBool flg;
3584       if (convert) {
3585         ierr = PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");CHKERRQ(ierr);
3586         if (flg) {
3587           ierr = MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
3588         }
3589       }
3590     }
3591 #endif
3592     *maij = B;
3593   }
3594   PetscFunctionReturn(0);
3595 }
3596