xref: /petsc/src/mat/impls/maij/maij.c (revision 3e7ff0edd3573be01c8c0fa32db97c3db8fa5c8d)
1 
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20 #include <../src/mat/utils/freespace.h>
21 #include <petsc-private/vecimpl.h>
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatMAIJGetAIJ"
25 /*@
26    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
27 
28    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
29 
30    Input Parameter:
31 .  A - the MAIJ matrix
32 
33    Output Parameter:
34 .  B - the AIJ matrix
35 
36    Level: advanced
37 
38    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
39 
40 .seealso: MatCreateMAIJ()
41 @*/
42 PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
43 {
44   PetscErrorCode ierr;
45   PetscBool      ismpimaij,isseqmaij;
46 
47   PetscFunctionBegin;
48   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
49   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
50   if (ismpimaij) {
51     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
52 
53     *B = b->A;
54   } else if (isseqmaij) {
55     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
56 
57     *B = b->AIJ;
58   } else {
59     *B = A;
60   }
61   PetscFunctionReturn(0);
62 }
63 
64 #undef __FUNCT__
65 #define __FUNCT__ "MatMAIJRedimension"
66 /*@C
67    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
68 
69    Logically Collective
70 
71    Input Parameter:
72 +  A - the MAIJ matrix
73 -  dof - the block size for the new matrix
74 
75    Output Parameter:
76 .  B - the new MAIJ matrix
77 
78    Level: advanced
79 
80 .seealso: MatCreateMAIJ()
81 @*/
82 PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
83 {
84   PetscErrorCode ierr;
85   Mat            Aij = NULL;
86 
87   PetscFunctionBegin;
88   PetscValidLogicalCollectiveInt(A,dof,2);
89   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
90   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 
94 #undef __FUNCT__
95 #define __FUNCT__ "MatDestroy_SeqMAIJ"
96 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
97 {
98   PetscErrorCode ierr;
99   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
100 
101   PetscFunctionBegin;
102   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
103   ierr = PetscFree(A->data);CHKERRQ(ierr);
104   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr);
105   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr);
106   PetscFunctionReturn(0);
107 }
108 
109 #undef __FUNCT__
110 #define __FUNCT__ "MatSetUp_MAIJ"
111 PetscErrorCode MatSetUp_MAIJ(Mat A)
112 {
113   PetscFunctionBegin;
114   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
115   PetscFunctionReturn(0);
116 }
117 
118 #undef __FUNCT__
119 #define __FUNCT__ "MatView_SeqMAIJ"
120 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
121 {
122   PetscErrorCode ierr;
123   Mat            B;
124 
125   PetscFunctionBegin;
126   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
127   ierr = MatView(B,viewer);CHKERRQ(ierr);
128   ierr = MatDestroy(&B);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "MatView_MPIMAIJ"
134 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
135 {
136   PetscErrorCode ierr;
137   Mat            B;
138 
139   PetscFunctionBegin;
140   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
141   ierr = MatView(B,viewer);CHKERRQ(ierr);
142   ierr = MatDestroy(&B);CHKERRQ(ierr);
143   PetscFunctionReturn(0);
144 }
145 
146 #undef __FUNCT__
147 #define __FUNCT__ "MatDestroy_MPIMAIJ"
148 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
149 {
150   PetscErrorCode ierr;
151   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
152 
153   PetscFunctionBegin;
154   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
155   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
156   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
157   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
158   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
159   ierr = PetscFree(A->data);CHKERRQ(ierr);
160   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr);
161   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr);
162   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
163   PetscFunctionReturn(0);
164 }
165 
166 /*MC
167   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
168   multicomponent problems, interpolating or restricting each component the same way independently.
169   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
170 
171   Operations provided:
172 . MatMult
173 . MatMultTranspose
174 . MatMultAdd
175 . MatMultTransposeAdd
176 
177   Level: advanced
178 
179 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
180 M*/
181 
182 #undef __FUNCT__
183 #define __FUNCT__ "MatCreate_MAIJ"
184 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
185 {
186   PetscErrorCode ierr;
187   Mat_MPIMAIJ    *b;
188   PetscMPIInt    size;
189 
190   PetscFunctionBegin;
191   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
192   A->data  = (void*)b;
193 
194   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
195 
196   A->ops->setup = MatSetUp_MAIJ;
197 
198   b->AIJ  = 0;
199   b->dof  = 0;
200   b->OAIJ = 0;
201   b->ctx  = 0;
202   b->w    = 0;
203   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
204   if (size == 1) {
205     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
206   } else {
207     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
208   }
209   PetscFunctionReturn(0);
210 }
211 
212 /* --------------------------------------------------------------------------------------*/
213 #undef __FUNCT__
214 #define __FUNCT__ "MatMult_SeqMAIJ_2"
215 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
216 {
217   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
218   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
219   const PetscScalar *x,*v;
220   PetscScalar       *y, sum1, sum2;
221   PetscErrorCode    ierr;
222   PetscInt          nonzerorow=0,n,i,jrow,j;
223   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
224 
225   PetscFunctionBegin;
226   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
227   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
228   idx  = a->j;
229   v    = a->a;
230   ii   = a->i;
231 
232   for (i=0; i<m; i++) {
233     jrow  = ii[i];
234     n     = ii[i+1] - jrow;
235     sum1  = 0.0;
236     sum2  = 0.0;
237 
238     nonzerorow += (n>0);
239     for (j=0; j<n; j++) {
240       sum1 += v[jrow]*x[2*idx[jrow]];
241       sum2 += v[jrow]*x[2*idx[jrow]+1];
242       jrow++;
243     }
244     y[2*i]   = sum1;
245     y[2*i+1] = sum2;
246   }
247 
248   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
249   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
250   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
256 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
257 {
258   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
259   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
260   const PetscScalar *x,*v;
261   PetscScalar       *y,alpha1,alpha2;
262   PetscErrorCode    ierr;
263   PetscInt          n,i;
264   const PetscInt    m = b->AIJ->rmap->n,*idx;
265 
266   PetscFunctionBegin;
267   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
268   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
269   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
270 
271   for (i=0; i<m; i++) {
272     idx    = a->j + a->i[i];
273     v      = a->a + a->i[i];
274     n      = a->i[i+1] - a->i[i];
275     alpha1 = x[2*i];
276     alpha2 = x[2*i+1];
277     while (n-->0) {
278       y[2*(*idx)]   += alpha1*(*v);
279       y[2*(*idx)+1] += alpha2*(*v);
280       idx++; v++;
281     }
282   }
283   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
284   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
285   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
286   PetscFunctionReturn(0);
287 }
288 
289 #undef __FUNCT__
290 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
291 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
292 {
293   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
294   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
295   const PetscScalar *x,*v;
296   PetscScalar       *y,sum1, sum2;
297   PetscErrorCode    ierr;
298   PetscInt          n,i,jrow,j;
299   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
300 
301   PetscFunctionBegin;
302   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
303   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
304   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
305   idx  = a->j;
306   v    = a->a;
307   ii   = a->i;
308 
309   for (i=0; i<m; i++) {
310     jrow = ii[i];
311     n    = ii[i+1] - jrow;
312     sum1 = 0.0;
313     sum2 = 0.0;
314     for (j=0; j<n; j++) {
315       sum1 += v[jrow]*x[2*idx[jrow]];
316       sum2 += v[jrow]*x[2*idx[jrow]+1];
317       jrow++;
318     }
319     y[2*i]   += sum1;
320     y[2*i+1] += sum2;
321   }
322 
323   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
324   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
325   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
326   PetscFunctionReturn(0);
327 }
328 #undef __FUNCT__
329 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
330 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
331 {
332   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
333   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
334   const PetscScalar *x,*v;
335   PetscScalar       *y,alpha1,alpha2;
336   PetscErrorCode    ierr;
337   PetscInt          n,i;
338   const PetscInt    m = b->AIJ->rmap->n,*idx;
339 
340   PetscFunctionBegin;
341   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
342   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
343   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
344 
345   for (i=0; i<m; i++) {
346     idx    = a->j + a->i[i];
347     v      = a->a + a->i[i];
348     n      = a->i[i+1] - a->i[i];
349     alpha1 = x[2*i];
350     alpha2 = x[2*i+1];
351     while (n-->0) {
352       y[2*(*idx)]   += alpha1*(*v);
353       y[2*(*idx)+1] += alpha2*(*v);
354       idx++; v++;
355     }
356   }
357   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
358   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
359   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
360   PetscFunctionReturn(0);
361 }
362 /* --------------------------------------------------------------------------------------*/
363 #undef __FUNCT__
364 #define __FUNCT__ "MatMult_SeqMAIJ_3"
365 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
366 {
367   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
368   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
369   const PetscScalar *x,*v;
370   PetscScalar       *y,sum1, sum2, sum3;
371   PetscErrorCode    ierr;
372   PetscInt          nonzerorow=0,n,i,jrow,j;
373   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
374 
375   PetscFunctionBegin;
376   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
377   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
378   idx  = a->j;
379   v    = a->a;
380   ii   = a->i;
381 
382   for (i=0; i<m; i++) {
383     jrow  = ii[i];
384     n     = ii[i+1] - jrow;
385     sum1  = 0.0;
386     sum2  = 0.0;
387     sum3  = 0.0;
388 
389     nonzerorow += (n>0);
390     for (j=0; j<n; j++) {
391       sum1 += v[jrow]*x[3*idx[jrow]];
392       sum2 += v[jrow]*x[3*idx[jrow]+1];
393       sum3 += v[jrow]*x[3*idx[jrow]+2];
394       jrow++;
395      }
396     y[3*i]   = sum1;
397     y[3*i+1] = sum2;
398     y[3*i+2] = sum3;
399   }
400 
401   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
402   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
403   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
404   PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
409 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
410 {
411   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
412   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
413   const PetscScalar *x,*v;
414   PetscScalar       *y,alpha1,alpha2,alpha3;
415   PetscErrorCode    ierr;
416   PetscInt          n,i;
417   const PetscInt    m = b->AIJ->rmap->n,*idx;
418 
419   PetscFunctionBegin;
420   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
421   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
422   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
423 
424   for (i=0; i<m; i++) {
425     idx    = a->j + a->i[i];
426     v      = a->a + a->i[i];
427     n      = a->i[i+1] - a->i[i];
428     alpha1 = x[3*i];
429     alpha2 = x[3*i+1];
430     alpha3 = x[3*i+2];
431     while (n-->0) {
432       y[3*(*idx)]   += alpha1*(*v);
433       y[3*(*idx)+1] += alpha2*(*v);
434       y[3*(*idx)+2] += alpha3*(*v);
435       idx++; v++;
436     }
437   }
438   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
439   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
440   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
446 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
447 {
448   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
449   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
450   const PetscScalar *x,*v;
451   PetscScalar       *y,sum1, sum2, sum3;
452   PetscErrorCode    ierr;
453   PetscInt          n,i,jrow,j;
454   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
455 
456   PetscFunctionBegin;
457   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
458   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
459   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
460   idx  = a->j;
461   v    = a->a;
462   ii   = a->i;
463 
464   for (i=0; i<m; i++) {
465     jrow = ii[i];
466     n    = ii[i+1] - jrow;
467     sum1 = 0.0;
468     sum2 = 0.0;
469     sum3 = 0.0;
470     for (j=0; j<n; j++) {
471       sum1 += v[jrow]*x[3*idx[jrow]];
472       sum2 += v[jrow]*x[3*idx[jrow]+1];
473       sum3 += v[jrow]*x[3*idx[jrow]+2];
474       jrow++;
475     }
476     y[3*i]   += sum1;
477     y[3*i+1] += sum2;
478     y[3*i+2] += sum3;
479   }
480 
481   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
482   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
483   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
484   PetscFunctionReturn(0);
485 }
486 #undef __FUNCT__
487 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
488 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
489 {
490   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
491   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
492   const PetscScalar *x,*v;
493   PetscScalar       *y,alpha1,alpha2,alpha3;
494   PetscErrorCode    ierr;
495   PetscInt          n,i;
496   const PetscInt    m = b->AIJ->rmap->n,*idx;
497 
498   PetscFunctionBegin;
499   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
500   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
501   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
502   for (i=0; i<m; i++) {
503     idx    = a->j + a->i[i];
504     v      = a->a + a->i[i];
505     n      = a->i[i+1] - a->i[i];
506     alpha1 = x[3*i];
507     alpha2 = x[3*i+1];
508     alpha3 = x[3*i+2];
509     while (n-->0) {
510       y[3*(*idx)]   += alpha1*(*v);
511       y[3*(*idx)+1] += alpha2*(*v);
512       y[3*(*idx)+2] += alpha3*(*v);
513       idx++; v++;
514     }
515   }
516   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
517   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
518   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 /* ------------------------------------------------------------------------------*/
523 #undef __FUNCT__
524 #define __FUNCT__ "MatMult_SeqMAIJ_4"
525 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
526 {
527   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
528   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
529   const PetscScalar *x,*v;
530   PetscScalar       *y,sum1, sum2, sum3, sum4;
531   PetscErrorCode    ierr;
532   PetscInt          nonzerorow=0,n,i,jrow,j;
533   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
534 
535   PetscFunctionBegin;
536   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
537   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
538   idx  = a->j;
539   v    = a->a;
540   ii   = a->i;
541 
542   for (i=0; i<m; i++) {
543     jrow        = ii[i];
544     n           = ii[i+1] - jrow;
545     sum1        = 0.0;
546     sum2        = 0.0;
547     sum3        = 0.0;
548     sum4        = 0.0;
549     nonzerorow += (n>0);
550     for (j=0; j<n; j++) {
551       sum1 += v[jrow]*x[4*idx[jrow]];
552       sum2 += v[jrow]*x[4*idx[jrow]+1];
553       sum3 += v[jrow]*x[4*idx[jrow]+2];
554       sum4 += v[jrow]*x[4*idx[jrow]+3];
555       jrow++;
556     }
557     y[4*i]   = sum1;
558     y[4*i+1] = sum2;
559     y[4*i+2] = sum3;
560     y[4*i+3] = sum4;
561   }
562 
563   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
564   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
565   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
571 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
572 {
573   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
574   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
575   const PetscScalar *x,*v;
576   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
577   PetscErrorCode    ierr;
578   PetscInt          n,i;
579   const PetscInt    m = b->AIJ->rmap->n,*idx;
580 
581   PetscFunctionBegin;
582   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
583   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
584   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
585   for (i=0; i<m; i++) {
586     idx    = a->j + a->i[i];
587     v      = a->a + a->i[i];
588     n      = a->i[i+1] - a->i[i];
589     alpha1 = x[4*i];
590     alpha2 = x[4*i+1];
591     alpha3 = x[4*i+2];
592     alpha4 = x[4*i+3];
593     while (n-->0) {
594       y[4*(*idx)]   += alpha1*(*v);
595       y[4*(*idx)+1] += alpha2*(*v);
596       y[4*(*idx)+2] += alpha3*(*v);
597       y[4*(*idx)+3] += alpha4*(*v);
598       idx++; v++;
599     }
600   }
601   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
602   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
603   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
609 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
610 {
611   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
612   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
613   const PetscScalar *x,*v;
614   PetscScalar       *y,sum1, sum2, sum3, sum4;
615   PetscErrorCode    ierr;
616   PetscInt          n,i,jrow,j;
617   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
618 
619   PetscFunctionBegin;
620   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
621   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
622   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
623   idx  = a->j;
624   v    = a->a;
625   ii   = a->i;
626 
627   for (i=0; i<m; i++) {
628     jrow = ii[i];
629     n    = ii[i+1] - jrow;
630     sum1 = 0.0;
631     sum2 = 0.0;
632     sum3 = 0.0;
633     sum4 = 0.0;
634     for (j=0; j<n; j++) {
635       sum1 += v[jrow]*x[4*idx[jrow]];
636       sum2 += v[jrow]*x[4*idx[jrow]+1];
637       sum3 += v[jrow]*x[4*idx[jrow]+2];
638       sum4 += v[jrow]*x[4*idx[jrow]+3];
639       jrow++;
640     }
641     y[4*i]   += sum1;
642     y[4*i+1] += sum2;
643     y[4*i+2] += sum3;
644     y[4*i+3] += sum4;
645   }
646 
647   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
648   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
649   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 #undef __FUNCT__
653 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
654 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
655 {
656   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
657   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
658   const PetscScalar *x,*v;
659   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
660   PetscErrorCode    ierr;
661   PetscInt          n,i;
662   const PetscInt    m = b->AIJ->rmap->n,*idx;
663 
664   PetscFunctionBegin;
665   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
666   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
667   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
668 
669   for (i=0; i<m; i++) {
670     idx    = a->j + a->i[i];
671     v      = a->a + a->i[i];
672     n      = a->i[i+1] - a->i[i];
673     alpha1 = x[4*i];
674     alpha2 = x[4*i+1];
675     alpha3 = x[4*i+2];
676     alpha4 = x[4*i+3];
677     while (n-->0) {
678       y[4*(*idx)]   += alpha1*(*v);
679       y[4*(*idx)+1] += alpha2*(*v);
680       y[4*(*idx)+2] += alpha3*(*v);
681       y[4*(*idx)+3] += alpha4*(*v);
682       idx++; v++;
683     }
684   }
685   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
686   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
687   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 /* ------------------------------------------------------------------------------*/
691 
692 #undef __FUNCT__
693 #define __FUNCT__ "MatMult_SeqMAIJ_5"
694 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
695 {
696   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
697   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
698   const PetscScalar *x,*v;
699   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
700   PetscErrorCode    ierr;
701   PetscInt          nonzerorow=0,n,i,jrow,j;
702   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
703 
704   PetscFunctionBegin;
705   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
706   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
707   idx  = a->j;
708   v    = a->a;
709   ii   = a->i;
710 
711   for (i=0; i<m; i++) {
712     jrow  = ii[i];
713     n     = ii[i+1] - jrow;
714     sum1  = 0.0;
715     sum2  = 0.0;
716     sum3  = 0.0;
717     sum4  = 0.0;
718     sum5  = 0.0;
719 
720     nonzerorow += (n>0);
721     for (j=0; j<n; j++) {
722       sum1 += v[jrow]*x[5*idx[jrow]];
723       sum2 += v[jrow]*x[5*idx[jrow]+1];
724       sum3 += v[jrow]*x[5*idx[jrow]+2];
725       sum4 += v[jrow]*x[5*idx[jrow]+3];
726       sum5 += v[jrow]*x[5*idx[jrow]+4];
727       jrow++;
728     }
729     y[5*i]   = sum1;
730     y[5*i+1] = sum2;
731     y[5*i+2] = sum3;
732     y[5*i+3] = sum4;
733     y[5*i+4] = sum5;
734   }
735 
736   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
737   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
738   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
744 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
745 {
746   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
747   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
748   const PetscScalar *x,*v;
749   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
750   PetscErrorCode    ierr;
751   PetscInt          n,i;
752   const PetscInt    m = b->AIJ->rmap->n,*idx;
753 
754   PetscFunctionBegin;
755   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
756   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
757   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
758 
759   for (i=0; i<m; i++) {
760     idx    = a->j + a->i[i];
761     v      = a->a + a->i[i];
762     n      = a->i[i+1] - a->i[i];
763     alpha1 = x[5*i];
764     alpha2 = x[5*i+1];
765     alpha3 = x[5*i+2];
766     alpha4 = x[5*i+3];
767     alpha5 = x[5*i+4];
768     while (n-->0) {
769       y[5*(*idx)]   += alpha1*(*v);
770       y[5*(*idx)+1] += alpha2*(*v);
771       y[5*(*idx)+2] += alpha3*(*v);
772       y[5*(*idx)+3] += alpha4*(*v);
773       y[5*(*idx)+4] += alpha5*(*v);
774       idx++; v++;
775     }
776   }
777   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
778   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
779   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
780   PetscFunctionReturn(0);
781 }
782 
783 #undef __FUNCT__
784 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
785 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
786 {
787   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
788   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
789   const PetscScalar *x,*v;
790   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
791   PetscErrorCode    ierr;
792   PetscInt          n,i,jrow,j;
793   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
794 
795   PetscFunctionBegin;
796   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
797   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
798   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
799   idx  = a->j;
800   v    = a->a;
801   ii   = a->i;
802 
803   for (i=0; i<m; i++) {
804     jrow = ii[i];
805     n    = ii[i+1] - jrow;
806     sum1 = 0.0;
807     sum2 = 0.0;
808     sum3 = 0.0;
809     sum4 = 0.0;
810     sum5 = 0.0;
811     for (j=0; j<n; j++) {
812       sum1 += v[jrow]*x[5*idx[jrow]];
813       sum2 += v[jrow]*x[5*idx[jrow]+1];
814       sum3 += v[jrow]*x[5*idx[jrow]+2];
815       sum4 += v[jrow]*x[5*idx[jrow]+3];
816       sum5 += v[jrow]*x[5*idx[jrow]+4];
817       jrow++;
818     }
819     y[5*i]   += sum1;
820     y[5*i+1] += sum2;
821     y[5*i+2] += sum3;
822     y[5*i+3] += sum4;
823     y[5*i+4] += sum5;
824   }
825 
826   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
827   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
828   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
834 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
835 {
836   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
837   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
838   const PetscScalar *x,*v;
839   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
840   PetscErrorCode    ierr;
841   PetscInt          n,i;
842   const PetscInt    m = b->AIJ->rmap->n,*idx;
843 
844   PetscFunctionBegin;
845   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
846   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
847   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
848 
849   for (i=0; i<m; i++) {
850     idx    = a->j + a->i[i];
851     v      = a->a + a->i[i];
852     n      = a->i[i+1] - a->i[i];
853     alpha1 = x[5*i];
854     alpha2 = x[5*i+1];
855     alpha3 = x[5*i+2];
856     alpha4 = x[5*i+3];
857     alpha5 = x[5*i+4];
858     while (n-->0) {
859       y[5*(*idx)]   += alpha1*(*v);
860       y[5*(*idx)+1] += alpha2*(*v);
861       y[5*(*idx)+2] += alpha3*(*v);
862       y[5*(*idx)+3] += alpha4*(*v);
863       y[5*(*idx)+4] += alpha5*(*v);
864       idx++; v++;
865     }
866   }
867   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
868   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
869   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 /* ------------------------------------------------------------------------------*/
874 #undef __FUNCT__
875 #define __FUNCT__ "MatMult_SeqMAIJ_6"
876 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
877 {
878   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
879   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
880   const PetscScalar *x,*v;
881   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
882   PetscErrorCode    ierr;
883   PetscInt          nonzerorow=0,n,i,jrow,j;
884   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
885 
886   PetscFunctionBegin;
887   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
888   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
889   idx  = a->j;
890   v    = a->a;
891   ii   = a->i;
892 
893   for (i=0; i<m; i++) {
894     jrow  = ii[i];
895     n     = ii[i+1] - jrow;
896     sum1  = 0.0;
897     sum2  = 0.0;
898     sum3  = 0.0;
899     sum4  = 0.0;
900     sum5  = 0.0;
901     sum6  = 0.0;
902 
903     nonzerorow += (n>0);
904     for (j=0; j<n; j++) {
905       sum1 += v[jrow]*x[6*idx[jrow]];
906       sum2 += v[jrow]*x[6*idx[jrow]+1];
907       sum3 += v[jrow]*x[6*idx[jrow]+2];
908       sum4 += v[jrow]*x[6*idx[jrow]+3];
909       sum5 += v[jrow]*x[6*idx[jrow]+4];
910       sum6 += v[jrow]*x[6*idx[jrow]+5];
911       jrow++;
912     }
913     y[6*i]   = sum1;
914     y[6*i+1] = sum2;
915     y[6*i+2] = sum3;
916     y[6*i+3] = sum4;
917     y[6*i+4] = sum5;
918     y[6*i+5] = sum6;
919   }
920 
921   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
922   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
923   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
924   PetscFunctionReturn(0);
925 }
926 
927 #undef __FUNCT__
928 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
929 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
930 {
931   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
932   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
933   const PetscScalar *x,*v;
934   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
935   PetscErrorCode    ierr;
936   PetscInt          n,i;
937   const PetscInt    m = b->AIJ->rmap->n,*idx;
938 
939   PetscFunctionBegin;
940   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
941   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
942   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
943 
944   for (i=0; i<m; i++) {
945     idx    = a->j + a->i[i];
946     v      = a->a + a->i[i];
947     n      = a->i[i+1] - a->i[i];
948     alpha1 = x[6*i];
949     alpha2 = x[6*i+1];
950     alpha3 = x[6*i+2];
951     alpha4 = x[6*i+3];
952     alpha5 = x[6*i+4];
953     alpha6 = x[6*i+5];
954     while (n-->0) {
955       y[6*(*idx)]   += alpha1*(*v);
956       y[6*(*idx)+1] += alpha2*(*v);
957       y[6*(*idx)+2] += alpha3*(*v);
958       y[6*(*idx)+3] += alpha4*(*v);
959       y[6*(*idx)+4] += alpha5*(*v);
960       y[6*(*idx)+5] += alpha6*(*v);
961       idx++; v++;
962     }
963   }
964   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
965   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
966   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
967   PetscFunctionReturn(0);
968 }
969 
970 #undef __FUNCT__
971 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
972 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
973 {
974   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
975   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
976   const PetscScalar *x,*v;
977   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
978   PetscErrorCode    ierr;
979   PetscInt          n,i,jrow,j;
980   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
981 
982   PetscFunctionBegin;
983   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
984   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
985   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
986   idx  = a->j;
987   v    = a->a;
988   ii   = a->i;
989 
990   for (i=0; i<m; i++) {
991     jrow = ii[i];
992     n    = ii[i+1] - jrow;
993     sum1 = 0.0;
994     sum2 = 0.0;
995     sum3 = 0.0;
996     sum4 = 0.0;
997     sum5 = 0.0;
998     sum6 = 0.0;
999     for (j=0; j<n; j++) {
1000       sum1 += v[jrow]*x[6*idx[jrow]];
1001       sum2 += v[jrow]*x[6*idx[jrow]+1];
1002       sum3 += v[jrow]*x[6*idx[jrow]+2];
1003       sum4 += v[jrow]*x[6*idx[jrow]+3];
1004       sum5 += v[jrow]*x[6*idx[jrow]+4];
1005       sum6 += v[jrow]*x[6*idx[jrow]+5];
1006       jrow++;
1007     }
1008     y[6*i]   += sum1;
1009     y[6*i+1] += sum2;
1010     y[6*i+2] += sum3;
1011     y[6*i+3] += sum4;
1012     y[6*i+4] += sum5;
1013     y[6*i+5] += sum6;
1014   }
1015 
1016   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1017   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1018   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 #undef __FUNCT__
1023 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
1024 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
1025 {
1026   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1027   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1028   const PetscScalar *x,*v;
1029   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1030   PetscErrorCode    ierr;
1031   PetscInt          n,i;
1032   const PetscInt    m = b->AIJ->rmap->n,*idx;
1033 
1034   PetscFunctionBegin;
1035   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1036   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1037   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1038 
1039   for (i=0; i<m; i++) {
1040     idx    = a->j + a->i[i];
1041     v      = a->a + a->i[i];
1042     n      = a->i[i+1] - a->i[i];
1043     alpha1 = x[6*i];
1044     alpha2 = x[6*i+1];
1045     alpha3 = x[6*i+2];
1046     alpha4 = x[6*i+3];
1047     alpha5 = x[6*i+4];
1048     alpha6 = x[6*i+5];
1049     while (n-->0) {
1050       y[6*(*idx)]   += alpha1*(*v);
1051       y[6*(*idx)+1] += alpha2*(*v);
1052       y[6*(*idx)+2] += alpha3*(*v);
1053       y[6*(*idx)+3] += alpha4*(*v);
1054       y[6*(*idx)+4] += alpha5*(*v);
1055       y[6*(*idx)+5] += alpha6*(*v);
1056       idx++; v++;
1057     }
1058   }
1059   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1060   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1061   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 /* ------------------------------------------------------------------------------*/
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatMult_SeqMAIJ_7"
1068 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1069 {
1070   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1071   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1072   const PetscScalar *x,*v;
1073   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1074   PetscErrorCode    ierr;
1075   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1076   PetscInt          nonzerorow=0,n,i,jrow,j;
1077 
1078   PetscFunctionBegin;
1079   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1080   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1081   idx  = a->j;
1082   v    = a->a;
1083   ii   = a->i;
1084 
1085   for (i=0; i<m; i++) {
1086     jrow  = ii[i];
1087     n     = ii[i+1] - jrow;
1088     sum1  = 0.0;
1089     sum2  = 0.0;
1090     sum3  = 0.0;
1091     sum4  = 0.0;
1092     sum5  = 0.0;
1093     sum6  = 0.0;
1094     sum7  = 0.0;
1095 
1096     nonzerorow += (n>0);
1097     for (j=0; j<n; j++) {
1098       sum1 += v[jrow]*x[7*idx[jrow]];
1099       sum2 += v[jrow]*x[7*idx[jrow]+1];
1100       sum3 += v[jrow]*x[7*idx[jrow]+2];
1101       sum4 += v[jrow]*x[7*idx[jrow]+3];
1102       sum5 += v[jrow]*x[7*idx[jrow]+4];
1103       sum6 += v[jrow]*x[7*idx[jrow]+5];
1104       sum7 += v[jrow]*x[7*idx[jrow]+6];
1105       jrow++;
1106     }
1107     y[7*i]   = sum1;
1108     y[7*i+1] = sum2;
1109     y[7*i+2] = sum3;
1110     y[7*i+3] = sum4;
1111     y[7*i+4] = sum5;
1112     y[7*i+5] = sum6;
1113     y[7*i+6] = sum7;
1114   }
1115 
1116   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
1117   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1118   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1124 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1125 {
1126   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1127   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1128   const PetscScalar *x,*v;
1129   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1130   PetscErrorCode    ierr;
1131   const PetscInt    m = b->AIJ->rmap->n,*idx;
1132   PetscInt          n,i;
1133 
1134   PetscFunctionBegin;
1135   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1136   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1137   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1138 
1139   for (i=0; i<m; i++) {
1140     idx    = a->j + a->i[i];
1141     v      = a->a + a->i[i];
1142     n      = a->i[i+1] - a->i[i];
1143     alpha1 = x[7*i];
1144     alpha2 = x[7*i+1];
1145     alpha3 = x[7*i+2];
1146     alpha4 = x[7*i+3];
1147     alpha5 = x[7*i+4];
1148     alpha6 = x[7*i+5];
1149     alpha7 = x[7*i+6];
1150     while (n-->0) {
1151       y[7*(*idx)]   += alpha1*(*v);
1152       y[7*(*idx)+1] += alpha2*(*v);
1153       y[7*(*idx)+2] += alpha3*(*v);
1154       y[7*(*idx)+3] += alpha4*(*v);
1155       y[7*(*idx)+4] += alpha5*(*v);
1156       y[7*(*idx)+5] += alpha6*(*v);
1157       y[7*(*idx)+6] += alpha7*(*v);
1158       idx++; v++;
1159     }
1160   }
1161   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1162   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1163   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1169 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1170 {
1171   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1172   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1173   const PetscScalar *x,*v;
1174   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1175   PetscErrorCode    ierr;
1176   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1177   PetscInt          n,i,jrow,j;
1178 
1179   PetscFunctionBegin;
1180   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1181   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1182   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1183   idx  = a->j;
1184   v    = a->a;
1185   ii   = a->i;
1186 
1187   for (i=0; i<m; i++) {
1188     jrow = ii[i];
1189     n    = ii[i+1] - jrow;
1190     sum1 = 0.0;
1191     sum2 = 0.0;
1192     sum3 = 0.0;
1193     sum4 = 0.0;
1194     sum5 = 0.0;
1195     sum6 = 0.0;
1196     sum7 = 0.0;
1197     for (j=0; j<n; j++) {
1198       sum1 += v[jrow]*x[7*idx[jrow]];
1199       sum2 += v[jrow]*x[7*idx[jrow]+1];
1200       sum3 += v[jrow]*x[7*idx[jrow]+2];
1201       sum4 += v[jrow]*x[7*idx[jrow]+3];
1202       sum5 += v[jrow]*x[7*idx[jrow]+4];
1203       sum6 += v[jrow]*x[7*idx[jrow]+5];
1204       sum7 += v[jrow]*x[7*idx[jrow]+6];
1205       jrow++;
1206     }
1207     y[7*i]   += sum1;
1208     y[7*i+1] += sum2;
1209     y[7*i+2] += sum3;
1210     y[7*i+3] += sum4;
1211     y[7*i+4] += sum5;
1212     y[7*i+5] += sum6;
1213     y[7*i+6] += sum7;
1214   }
1215 
1216   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1217   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1218   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 #undef __FUNCT__
1223 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1224 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1225 {
1226   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1227   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1228   const PetscScalar *x,*v;
1229   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1230   PetscErrorCode    ierr;
1231   const PetscInt    m = b->AIJ->rmap->n,*idx;
1232   PetscInt          n,i;
1233 
1234   PetscFunctionBegin;
1235   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1236   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1237   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1238   for (i=0; i<m; i++) {
1239     idx    = a->j + a->i[i];
1240     v      = a->a + a->i[i];
1241     n      = a->i[i+1] - a->i[i];
1242     alpha1 = x[7*i];
1243     alpha2 = x[7*i+1];
1244     alpha3 = x[7*i+2];
1245     alpha4 = x[7*i+3];
1246     alpha5 = x[7*i+4];
1247     alpha6 = x[7*i+5];
1248     alpha7 = x[7*i+6];
1249     while (n-->0) {
1250       y[7*(*idx)]   += alpha1*(*v);
1251       y[7*(*idx)+1] += alpha2*(*v);
1252       y[7*(*idx)+2] += alpha3*(*v);
1253       y[7*(*idx)+3] += alpha4*(*v);
1254       y[7*(*idx)+4] += alpha5*(*v);
1255       y[7*(*idx)+5] += alpha6*(*v);
1256       y[7*(*idx)+6] += alpha7*(*v);
1257       idx++; v++;
1258     }
1259   }
1260   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1261   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1262   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatMult_SeqMAIJ_8"
1268 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1269 {
1270   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1271   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1272   const PetscScalar *x,*v;
1273   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1274   PetscErrorCode    ierr;
1275   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1276   PetscInt          nonzerorow=0,n,i,jrow,j;
1277 
1278   PetscFunctionBegin;
1279   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1280   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1281   idx  = a->j;
1282   v    = a->a;
1283   ii   = a->i;
1284 
1285   for (i=0; i<m; i++) {
1286     jrow  = ii[i];
1287     n     = ii[i+1] - jrow;
1288     sum1  = 0.0;
1289     sum2  = 0.0;
1290     sum3  = 0.0;
1291     sum4  = 0.0;
1292     sum5  = 0.0;
1293     sum6  = 0.0;
1294     sum7  = 0.0;
1295     sum8  = 0.0;
1296 
1297     nonzerorow += (n>0);
1298     for (j=0; j<n; j++) {
1299       sum1 += v[jrow]*x[8*idx[jrow]];
1300       sum2 += v[jrow]*x[8*idx[jrow]+1];
1301       sum3 += v[jrow]*x[8*idx[jrow]+2];
1302       sum4 += v[jrow]*x[8*idx[jrow]+3];
1303       sum5 += v[jrow]*x[8*idx[jrow]+4];
1304       sum6 += v[jrow]*x[8*idx[jrow]+5];
1305       sum7 += v[jrow]*x[8*idx[jrow]+6];
1306       sum8 += v[jrow]*x[8*idx[jrow]+7];
1307       jrow++;
1308     }
1309     y[8*i]   = sum1;
1310     y[8*i+1] = sum2;
1311     y[8*i+2] = sum3;
1312     y[8*i+3] = sum4;
1313     y[8*i+4] = sum5;
1314     y[8*i+5] = sum6;
1315     y[8*i+6] = sum7;
1316     y[8*i+7] = sum8;
1317   }
1318 
1319   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
1320   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1321   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1327 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1328 {
1329   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1330   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1331   const PetscScalar *x,*v;
1332   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1333   PetscErrorCode    ierr;
1334   const PetscInt    m = b->AIJ->rmap->n,*idx;
1335   PetscInt          n,i;
1336 
1337   PetscFunctionBegin;
1338   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1339   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1340   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1341 
1342   for (i=0; i<m; i++) {
1343     idx    = a->j + a->i[i];
1344     v      = a->a + a->i[i];
1345     n      = a->i[i+1] - a->i[i];
1346     alpha1 = x[8*i];
1347     alpha2 = x[8*i+1];
1348     alpha3 = x[8*i+2];
1349     alpha4 = x[8*i+3];
1350     alpha5 = x[8*i+4];
1351     alpha6 = x[8*i+5];
1352     alpha7 = x[8*i+6];
1353     alpha8 = x[8*i+7];
1354     while (n-->0) {
1355       y[8*(*idx)]   += alpha1*(*v);
1356       y[8*(*idx)+1] += alpha2*(*v);
1357       y[8*(*idx)+2] += alpha3*(*v);
1358       y[8*(*idx)+3] += alpha4*(*v);
1359       y[8*(*idx)+4] += alpha5*(*v);
1360       y[8*(*idx)+5] += alpha6*(*v);
1361       y[8*(*idx)+6] += alpha7*(*v);
1362       y[8*(*idx)+7] += alpha8*(*v);
1363       idx++; v++;
1364     }
1365   }
1366   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1367   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1368   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 #undef __FUNCT__
1373 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1374 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1375 {
1376   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1377   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1378   const PetscScalar *x,*v;
1379   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1380   PetscErrorCode    ierr;
1381   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1382   PetscInt          n,i,jrow,j;
1383 
1384   PetscFunctionBegin;
1385   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1386   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1387   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1388   idx  = a->j;
1389   v    = a->a;
1390   ii   = a->i;
1391 
1392   for (i=0; i<m; i++) {
1393     jrow = ii[i];
1394     n    = ii[i+1] - jrow;
1395     sum1 = 0.0;
1396     sum2 = 0.0;
1397     sum3 = 0.0;
1398     sum4 = 0.0;
1399     sum5 = 0.0;
1400     sum6 = 0.0;
1401     sum7 = 0.0;
1402     sum8 = 0.0;
1403     for (j=0; j<n; j++) {
1404       sum1 += v[jrow]*x[8*idx[jrow]];
1405       sum2 += v[jrow]*x[8*idx[jrow]+1];
1406       sum3 += v[jrow]*x[8*idx[jrow]+2];
1407       sum4 += v[jrow]*x[8*idx[jrow]+3];
1408       sum5 += v[jrow]*x[8*idx[jrow]+4];
1409       sum6 += v[jrow]*x[8*idx[jrow]+5];
1410       sum7 += v[jrow]*x[8*idx[jrow]+6];
1411       sum8 += v[jrow]*x[8*idx[jrow]+7];
1412       jrow++;
1413     }
1414     y[8*i]   += sum1;
1415     y[8*i+1] += sum2;
1416     y[8*i+2] += sum3;
1417     y[8*i+3] += sum4;
1418     y[8*i+4] += sum5;
1419     y[8*i+5] += sum6;
1420     y[8*i+6] += sum7;
1421     y[8*i+7] += sum8;
1422   }
1423 
1424   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1425   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1426   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1432 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1433 {
1434   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1435   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1436   const PetscScalar *x,*v;
1437   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1438   PetscErrorCode    ierr;
1439   const PetscInt    m = b->AIJ->rmap->n,*idx;
1440   PetscInt          n,i;
1441 
1442   PetscFunctionBegin;
1443   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1444   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1445   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1446   for (i=0; i<m; i++) {
1447     idx    = a->j + a->i[i];
1448     v      = a->a + a->i[i];
1449     n      = a->i[i+1] - a->i[i];
1450     alpha1 = x[8*i];
1451     alpha2 = x[8*i+1];
1452     alpha3 = x[8*i+2];
1453     alpha4 = x[8*i+3];
1454     alpha5 = x[8*i+4];
1455     alpha6 = x[8*i+5];
1456     alpha7 = x[8*i+6];
1457     alpha8 = x[8*i+7];
1458     while (n-->0) {
1459       y[8*(*idx)]   += alpha1*(*v);
1460       y[8*(*idx)+1] += alpha2*(*v);
1461       y[8*(*idx)+2] += alpha3*(*v);
1462       y[8*(*idx)+3] += alpha4*(*v);
1463       y[8*(*idx)+4] += alpha5*(*v);
1464       y[8*(*idx)+5] += alpha6*(*v);
1465       y[8*(*idx)+6] += alpha7*(*v);
1466       y[8*(*idx)+7] += alpha8*(*v);
1467       idx++; v++;
1468     }
1469   }
1470   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1471   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1472   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 /* ------------------------------------------------------------------------------*/
1477 #undef __FUNCT__
1478 #define __FUNCT__ "MatMult_SeqMAIJ_9"
1479 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1480 {
1481   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1482   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1483   const PetscScalar *x,*v;
1484   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1485   PetscErrorCode    ierr;
1486   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1487   PetscInt          nonzerorow=0,n,i,jrow,j;
1488 
1489   PetscFunctionBegin;
1490   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1491   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1492   idx  = a->j;
1493   v    = a->a;
1494   ii   = a->i;
1495 
1496   for (i=0; i<m; i++) {
1497     jrow  = ii[i];
1498     n     = ii[i+1] - jrow;
1499     sum1  = 0.0;
1500     sum2  = 0.0;
1501     sum3  = 0.0;
1502     sum4  = 0.0;
1503     sum5  = 0.0;
1504     sum6  = 0.0;
1505     sum7  = 0.0;
1506     sum8  = 0.0;
1507     sum9  = 0.0;
1508 
1509     nonzerorow += (n>0);
1510     for (j=0; j<n; j++) {
1511       sum1 += v[jrow]*x[9*idx[jrow]];
1512       sum2 += v[jrow]*x[9*idx[jrow]+1];
1513       sum3 += v[jrow]*x[9*idx[jrow]+2];
1514       sum4 += v[jrow]*x[9*idx[jrow]+3];
1515       sum5 += v[jrow]*x[9*idx[jrow]+4];
1516       sum6 += v[jrow]*x[9*idx[jrow]+5];
1517       sum7 += v[jrow]*x[9*idx[jrow]+6];
1518       sum8 += v[jrow]*x[9*idx[jrow]+7];
1519       sum9 += v[jrow]*x[9*idx[jrow]+8];
1520       jrow++;
1521     }
1522     y[9*i]   = sum1;
1523     y[9*i+1] = sum2;
1524     y[9*i+2] = sum3;
1525     y[9*i+3] = sum4;
1526     y[9*i+4] = sum5;
1527     y[9*i+5] = sum6;
1528     y[9*i+6] = sum7;
1529     y[9*i+7] = sum8;
1530     y[9*i+8] = sum9;
1531   }
1532 
1533   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
1534   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1535   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 /* ------------------------------------------------------------------------------*/
1540 
1541 #undef __FUNCT__
1542 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1543 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1544 {
1545   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1546   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1547   const PetscScalar *x,*v;
1548   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1549   PetscErrorCode    ierr;
1550   const PetscInt    m = b->AIJ->rmap->n,*idx;
1551   PetscInt          n,i;
1552 
1553   PetscFunctionBegin;
1554   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1555   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1556   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1557 
1558   for (i=0; i<m; i++) {
1559     idx    = a->j + a->i[i];
1560     v      = a->a + a->i[i];
1561     n      = a->i[i+1] - a->i[i];
1562     alpha1 = x[9*i];
1563     alpha2 = x[9*i+1];
1564     alpha3 = x[9*i+2];
1565     alpha4 = x[9*i+3];
1566     alpha5 = x[9*i+4];
1567     alpha6 = x[9*i+5];
1568     alpha7 = x[9*i+6];
1569     alpha8 = x[9*i+7];
1570     alpha9 = x[9*i+8];
1571     while (n-->0) {
1572       y[9*(*idx)]   += alpha1*(*v);
1573       y[9*(*idx)+1] += alpha2*(*v);
1574       y[9*(*idx)+2] += alpha3*(*v);
1575       y[9*(*idx)+3] += alpha4*(*v);
1576       y[9*(*idx)+4] += alpha5*(*v);
1577       y[9*(*idx)+5] += alpha6*(*v);
1578       y[9*(*idx)+6] += alpha7*(*v);
1579       y[9*(*idx)+7] += alpha8*(*v);
1580       y[9*(*idx)+8] += alpha9*(*v);
1581       idx++; v++;
1582     }
1583   }
1584   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1585   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1586   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1592 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1593 {
1594   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1595   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1596   const PetscScalar *x,*v;
1597   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1598   PetscErrorCode    ierr;
1599   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1600   PetscInt          n,i,jrow,j;
1601 
1602   PetscFunctionBegin;
1603   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1604   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1605   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1606   idx  = a->j;
1607   v    = a->a;
1608   ii   = a->i;
1609 
1610   for (i=0; i<m; i++) {
1611     jrow = ii[i];
1612     n    = ii[i+1] - jrow;
1613     sum1 = 0.0;
1614     sum2 = 0.0;
1615     sum3 = 0.0;
1616     sum4 = 0.0;
1617     sum5 = 0.0;
1618     sum6 = 0.0;
1619     sum7 = 0.0;
1620     sum8 = 0.0;
1621     sum9 = 0.0;
1622     for (j=0; j<n; j++) {
1623       sum1 += v[jrow]*x[9*idx[jrow]];
1624       sum2 += v[jrow]*x[9*idx[jrow]+1];
1625       sum3 += v[jrow]*x[9*idx[jrow]+2];
1626       sum4 += v[jrow]*x[9*idx[jrow]+3];
1627       sum5 += v[jrow]*x[9*idx[jrow]+4];
1628       sum6 += v[jrow]*x[9*idx[jrow]+5];
1629       sum7 += v[jrow]*x[9*idx[jrow]+6];
1630       sum8 += v[jrow]*x[9*idx[jrow]+7];
1631       sum9 += v[jrow]*x[9*idx[jrow]+8];
1632       jrow++;
1633     }
1634     y[9*i]   += sum1;
1635     y[9*i+1] += sum2;
1636     y[9*i+2] += sum3;
1637     y[9*i+3] += sum4;
1638     y[9*i+4] += sum5;
1639     y[9*i+5] += sum6;
1640     y[9*i+6] += sum7;
1641     y[9*i+7] += sum8;
1642     y[9*i+8] += sum9;
1643   }
1644 
1645   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1646   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1647   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 #undef __FUNCT__
1652 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1653 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1654 {
1655   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1656   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1657   const PetscScalar *x,*v;
1658   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1659   PetscErrorCode    ierr;
1660   const PetscInt    m = b->AIJ->rmap->n,*idx;
1661   PetscInt          n,i;
1662 
1663   PetscFunctionBegin;
1664   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1665   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1666   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1667   for (i=0; i<m; i++) {
1668     idx    = a->j + a->i[i];
1669     v      = a->a + a->i[i];
1670     n      = a->i[i+1] - a->i[i];
1671     alpha1 = x[9*i];
1672     alpha2 = x[9*i+1];
1673     alpha3 = x[9*i+2];
1674     alpha4 = x[9*i+3];
1675     alpha5 = x[9*i+4];
1676     alpha6 = x[9*i+5];
1677     alpha7 = x[9*i+6];
1678     alpha8 = x[9*i+7];
1679     alpha9 = x[9*i+8];
1680     while (n-->0) {
1681       y[9*(*idx)]   += alpha1*(*v);
1682       y[9*(*idx)+1] += alpha2*(*v);
1683       y[9*(*idx)+2] += alpha3*(*v);
1684       y[9*(*idx)+3] += alpha4*(*v);
1685       y[9*(*idx)+4] += alpha5*(*v);
1686       y[9*(*idx)+5] += alpha6*(*v);
1687       y[9*(*idx)+6] += alpha7*(*v);
1688       y[9*(*idx)+7] += alpha8*(*v);
1689       y[9*(*idx)+8] += alpha9*(*v);
1690       idx++; v++;
1691     }
1692   }
1693   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1694   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1695   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1696   PetscFunctionReturn(0);
1697 }
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1700 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1701 {
1702   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1703   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1704   const PetscScalar *x,*v;
1705   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1706   PetscErrorCode    ierr;
1707   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1708   PetscInt          nonzerorow=0,n,i,jrow,j;
1709 
1710   PetscFunctionBegin;
1711   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1712   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1713   idx  = a->j;
1714   v    = a->a;
1715   ii   = a->i;
1716 
1717   for (i=0; i<m; i++) {
1718     jrow  = ii[i];
1719     n     = ii[i+1] - jrow;
1720     sum1  = 0.0;
1721     sum2  = 0.0;
1722     sum3  = 0.0;
1723     sum4  = 0.0;
1724     sum5  = 0.0;
1725     sum6  = 0.0;
1726     sum7  = 0.0;
1727     sum8  = 0.0;
1728     sum9  = 0.0;
1729     sum10 = 0.0;
1730 
1731     nonzerorow += (n>0);
1732     for (j=0; j<n; j++) {
1733       sum1  += v[jrow]*x[10*idx[jrow]];
1734       sum2  += v[jrow]*x[10*idx[jrow]+1];
1735       sum3  += v[jrow]*x[10*idx[jrow]+2];
1736       sum4  += v[jrow]*x[10*idx[jrow]+3];
1737       sum5  += v[jrow]*x[10*idx[jrow]+4];
1738       sum6  += v[jrow]*x[10*idx[jrow]+5];
1739       sum7  += v[jrow]*x[10*idx[jrow]+6];
1740       sum8  += v[jrow]*x[10*idx[jrow]+7];
1741       sum9  += v[jrow]*x[10*idx[jrow]+8];
1742       sum10 += v[jrow]*x[10*idx[jrow]+9];
1743       jrow++;
1744     }
1745     y[10*i]   = sum1;
1746     y[10*i+1] = sum2;
1747     y[10*i+2] = sum3;
1748     y[10*i+3] = sum4;
1749     y[10*i+4] = sum5;
1750     y[10*i+5] = sum6;
1751     y[10*i+6] = sum7;
1752     y[10*i+7] = sum8;
1753     y[10*i+8] = sum9;
1754     y[10*i+9] = sum10;
1755   }
1756 
1757   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
1758   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1759   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 #undef __FUNCT__
1764 #define __FUNCT__ "MatMultAdd_SeqMAIJ_10"
1765 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1766 {
1767   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1768   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1769   const PetscScalar *x,*v;
1770   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1771   PetscErrorCode    ierr;
1772   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1773   PetscInt          n,i,jrow,j;
1774 
1775   PetscFunctionBegin;
1776   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1777   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1778   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1779   idx  = a->j;
1780   v    = a->a;
1781   ii   = a->i;
1782 
1783   for (i=0; i<m; i++) {
1784     jrow  = ii[i];
1785     n     = ii[i+1] - jrow;
1786     sum1  = 0.0;
1787     sum2  = 0.0;
1788     sum3  = 0.0;
1789     sum4  = 0.0;
1790     sum5  = 0.0;
1791     sum6  = 0.0;
1792     sum7  = 0.0;
1793     sum8  = 0.0;
1794     sum9  = 0.0;
1795     sum10 = 0.0;
1796     for (j=0; j<n; j++) {
1797       sum1  += v[jrow]*x[10*idx[jrow]];
1798       sum2  += v[jrow]*x[10*idx[jrow]+1];
1799       sum3  += v[jrow]*x[10*idx[jrow]+2];
1800       sum4  += v[jrow]*x[10*idx[jrow]+3];
1801       sum5  += v[jrow]*x[10*idx[jrow]+4];
1802       sum6  += v[jrow]*x[10*idx[jrow]+5];
1803       sum7  += v[jrow]*x[10*idx[jrow]+6];
1804       sum8  += v[jrow]*x[10*idx[jrow]+7];
1805       sum9  += v[jrow]*x[10*idx[jrow]+8];
1806       sum10 += v[jrow]*x[10*idx[jrow]+9];
1807       jrow++;
1808     }
1809     y[10*i]   += sum1;
1810     y[10*i+1] += sum2;
1811     y[10*i+2] += sum3;
1812     y[10*i+3] += sum4;
1813     y[10*i+4] += sum5;
1814     y[10*i+5] += sum6;
1815     y[10*i+6] += sum7;
1816     y[10*i+7] += sum8;
1817     y[10*i+8] += sum9;
1818     y[10*i+9] += sum10;
1819   }
1820 
1821   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1822   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1823   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 #undef __FUNCT__
1828 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1829 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1830 {
1831   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1832   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1833   const PetscScalar *x,*v;
1834   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1835   PetscErrorCode    ierr;
1836   const PetscInt    m = b->AIJ->rmap->n,*idx;
1837   PetscInt          n,i;
1838 
1839   PetscFunctionBegin;
1840   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1841   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1842   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1843 
1844   for (i=0; i<m; i++) {
1845     idx     = a->j + a->i[i];
1846     v       = a->a + a->i[i];
1847     n       = a->i[i+1] - a->i[i];
1848     alpha1  = x[10*i];
1849     alpha2  = x[10*i+1];
1850     alpha3  = x[10*i+2];
1851     alpha4  = x[10*i+3];
1852     alpha5  = x[10*i+4];
1853     alpha6  = x[10*i+5];
1854     alpha7  = x[10*i+6];
1855     alpha8  = x[10*i+7];
1856     alpha9  = x[10*i+8];
1857     alpha10 = x[10*i+9];
1858     while (n-->0) {
1859       y[10*(*idx)]   += alpha1*(*v);
1860       y[10*(*idx)+1] += alpha2*(*v);
1861       y[10*(*idx)+2] += alpha3*(*v);
1862       y[10*(*idx)+3] += alpha4*(*v);
1863       y[10*(*idx)+4] += alpha5*(*v);
1864       y[10*(*idx)+5] += alpha6*(*v);
1865       y[10*(*idx)+6] += alpha7*(*v);
1866       y[10*(*idx)+7] += alpha8*(*v);
1867       y[10*(*idx)+8] += alpha9*(*v);
1868       y[10*(*idx)+9] += alpha10*(*v);
1869       idx++; v++;
1870     }
1871   }
1872   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1873   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1874   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 #undef __FUNCT__
1879 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1880 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1881 {
1882   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1883   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1884   const PetscScalar *x,*v;
1885   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1886   PetscErrorCode    ierr;
1887   const PetscInt    m = b->AIJ->rmap->n,*idx;
1888   PetscInt          n,i;
1889 
1890   PetscFunctionBegin;
1891   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1892   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1893   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1894   for (i=0; i<m; i++) {
1895     idx     = a->j + a->i[i];
1896     v       = a->a + a->i[i];
1897     n       = a->i[i+1] - a->i[i];
1898     alpha1  = x[10*i];
1899     alpha2  = x[10*i+1];
1900     alpha3  = x[10*i+2];
1901     alpha4  = x[10*i+3];
1902     alpha5  = x[10*i+4];
1903     alpha6  = x[10*i+5];
1904     alpha7  = x[10*i+6];
1905     alpha8  = x[10*i+7];
1906     alpha9  = x[10*i+8];
1907     alpha10 = x[10*i+9];
1908     while (n-->0) {
1909       y[10*(*idx)]   += alpha1*(*v);
1910       y[10*(*idx)+1] += alpha2*(*v);
1911       y[10*(*idx)+2] += alpha3*(*v);
1912       y[10*(*idx)+3] += alpha4*(*v);
1913       y[10*(*idx)+4] += alpha5*(*v);
1914       y[10*(*idx)+5] += alpha6*(*v);
1915       y[10*(*idx)+6] += alpha7*(*v);
1916       y[10*(*idx)+7] += alpha8*(*v);
1917       y[10*(*idx)+8] += alpha9*(*v);
1918       y[10*(*idx)+9] += alpha10*(*v);
1919       idx++; v++;
1920     }
1921   }
1922   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1923   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1924   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 
1929 /*--------------------------------------------------------------------------------------------*/
1930 #undef __FUNCT__
1931 #define __FUNCT__ "MatMult_SeqMAIJ_11"
1932 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1933 {
1934   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1935   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1936   const PetscScalar *x,*v;
1937   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1938   PetscErrorCode    ierr;
1939   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1940   PetscInt          nonzerorow=0,n,i,jrow,j;
1941 
1942   PetscFunctionBegin;
1943   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1944   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1945   idx  = a->j;
1946   v    = a->a;
1947   ii   = a->i;
1948 
1949   for (i=0; i<m; i++) {
1950     jrow  = ii[i];
1951     n     = ii[i+1] - jrow;
1952     sum1  = 0.0;
1953     sum2  = 0.0;
1954     sum3  = 0.0;
1955     sum4  = 0.0;
1956     sum5  = 0.0;
1957     sum6  = 0.0;
1958     sum7  = 0.0;
1959     sum8  = 0.0;
1960     sum9  = 0.0;
1961     sum10 = 0.0;
1962     sum11 = 0.0;
1963 
1964     nonzerorow += (n>0);
1965     for (j=0; j<n; j++) {
1966       sum1  += v[jrow]*x[11*idx[jrow]];
1967       sum2  += v[jrow]*x[11*idx[jrow]+1];
1968       sum3  += v[jrow]*x[11*idx[jrow]+2];
1969       sum4  += v[jrow]*x[11*idx[jrow]+3];
1970       sum5  += v[jrow]*x[11*idx[jrow]+4];
1971       sum6  += v[jrow]*x[11*idx[jrow]+5];
1972       sum7  += v[jrow]*x[11*idx[jrow]+6];
1973       sum8  += v[jrow]*x[11*idx[jrow]+7];
1974       sum9  += v[jrow]*x[11*idx[jrow]+8];
1975       sum10 += v[jrow]*x[11*idx[jrow]+9];
1976       sum11 += v[jrow]*x[11*idx[jrow]+10];
1977       jrow++;
1978     }
1979     y[11*i]    = sum1;
1980     y[11*i+1]  = sum2;
1981     y[11*i+2]  = sum3;
1982     y[11*i+3]  = sum4;
1983     y[11*i+4]  = sum5;
1984     y[11*i+5]  = sum6;
1985     y[11*i+6]  = sum7;
1986     y[11*i+7]  = sum8;
1987     y[11*i+8]  = sum9;
1988     y[11*i+9]  = sum10;
1989     y[11*i+10] = sum11;
1990   }
1991 
1992   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
1993   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1994   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatMultAdd_SeqMAIJ_11"
2000 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2001 {
2002   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2003   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2004   const PetscScalar *x,*v;
2005   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
2006   PetscErrorCode    ierr;
2007   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2008   PetscInt          n,i,jrow,j;
2009 
2010   PetscFunctionBegin;
2011   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2012   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2013   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2014   idx  = a->j;
2015   v    = a->a;
2016   ii   = a->i;
2017 
2018   for (i=0; i<m; i++) {
2019     jrow  = ii[i];
2020     n     = ii[i+1] - jrow;
2021     sum1  = 0.0;
2022     sum2  = 0.0;
2023     sum3  = 0.0;
2024     sum4  = 0.0;
2025     sum5  = 0.0;
2026     sum6  = 0.0;
2027     sum7  = 0.0;
2028     sum8  = 0.0;
2029     sum9  = 0.0;
2030     sum10 = 0.0;
2031     sum11 = 0.0;
2032     for (j=0; j<n; j++) {
2033       sum1  += v[jrow]*x[11*idx[jrow]];
2034       sum2  += v[jrow]*x[11*idx[jrow]+1];
2035       sum3  += v[jrow]*x[11*idx[jrow]+2];
2036       sum4  += v[jrow]*x[11*idx[jrow]+3];
2037       sum5  += v[jrow]*x[11*idx[jrow]+4];
2038       sum6  += v[jrow]*x[11*idx[jrow]+5];
2039       sum7  += v[jrow]*x[11*idx[jrow]+6];
2040       sum8  += v[jrow]*x[11*idx[jrow]+7];
2041       sum9  += v[jrow]*x[11*idx[jrow]+8];
2042       sum10 += v[jrow]*x[11*idx[jrow]+9];
2043       sum11 += v[jrow]*x[11*idx[jrow]+10];
2044       jrow++;
2045     }
2046     y[11*i]    += sum1;
2047     y[11*i+1]  += sum2;
2048     y[11*i+2]  += sum3;
2049     y[11*i+3]  += sum4;
2050     y[11*i+4]  += sum5;
2051     y[11*i+5]  += sum6;
2052     y[11*i+6]  += sum7;
2053     y[11*i+7]  += sum8;
2054     y[11*i+8]  += sum9;
2055     y[11*i+9]  += sum10;
2056     y[11*i+10] += sum11;
2057   }
2058 
2059   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2060   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2061   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2062   PetscFunctionReturn(0);
2063 }
2064 
2065 #undef __FUNCT__
2066 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11"
2067 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2068 {
2069   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2070   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2071   const PetscScalar *x,*v;
2072   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2073   PetscErrorCode    ierr;
2074   const PetscInt    m = b->AIJ->rmap->n,*idx;
2075   PetscInt          n,i;
2076 
2077   PetscFunctionBegin;
2078   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2079   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2080   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2081 
2082   for (i=0; i<m; i++) {
2083     idx     = a->j + a->i[i];
2084     v       = a->a + a->i[i];
2085     n       = a->i[i+1] - a->i[i];
2086     alpha1  = x[11*i];
2087     alpha2  = x[11*i+1];
2088     alpha3  = x[11*i+2];
2089     alpha4  = x[11*i+3];
2090     alpha5  = x[11*i+4];
2091     alpha6  = x[11*i+5];
2092     alpha7  = x[11*i+6];
2093     alpha8  = x[11*i+7];
2094     alpha9  = x[11*i+8];
2095     alpha10 = x[11*i+9];
2096     alpha11 = x[11*i+10];
2097     while (n-->0) {
2098       y[11*(*idx)]    += alpha1*(*v);
2099       y[11*(*idx)+1]  += alpha2*(*v);
2100       y[11*(*idx)+2]  += alpha3*(*v);
2101       y[11*(*idx)+3]  += alpha4*(*v);
2102       y[11*(*idx)+4]  += alpha5*(*v);
2103       y[11*(*idx)+5]  += alpha6*(*v);
2104       y[11*(*idx)+6]  += alpha7*(*v);
2105       y[11*(*idx)+7]  += alpha8*(*v);
2106       y[11*(*idx)+8]  += alpha9*(*v);
2107       y[11*(*idx)+9]  += alpha10*(*v);
2108       y[11*(*idx)+10] += alpha11*(*v);
2109       idx++; v++;
2110     }
2111   }
2112   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2113   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2114   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 #undef __FUNCT__
2119 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11"
2120 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2121 {
2122   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2123   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2124   const PetscScalar *x,*v;
2125   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2126   PetscErrorCode    ierr;
2127   const PetscInt    m = b->AIJ->rmap->n,*idx;
2128   PetscInt          n,i;
2129 
2130   PetscFunctionBegin;
2131   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2132   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2133   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2134   for (i=0; i<m; i++) {
2135     idx     = a->j + a->i[i];
2136     v       = a->a + a->i[i];
2137     n       = a->i[i+1] - a->i[i];
2138     alpha1  = x[11*i];
2139     alpha2  = x[11*i+1];
2140     alpha3  = x[11*i+2];
2141     alpha4  = x[11*i+3];
2142     alpha5  = x[11*i+4];
2143     alpha6  = x[11*i+5];
2144     alpha7  = x[11*i+6];
2145     alpha8  = x[11*i+7];
2146     alpha9  = x[11*i+8];
2147     alpha10 = x[11*i+9];
2148     alpha11 = x[11*i+10];
2149     while (n-->0) {
2150       y[11*(*idx)]    += alpha1*(*v);
2151       y[11*(*idx)+1]  += alpha2*(*v);
2152       y[11*(*idx)+2]  += alpha3*(*v);
2153       y[11*(*idx)+3]  += alpha4*(*v);
2154       y[11*(*idx)+4]  += alpha5*(*v);
2155       y[11*(*idx)+5]  += alpha6*(*v);
2156       y[11*(*idx)+6]  += alpha7*(*v);
2157       y[11*(*idx)+7]  += alpha8*(*v);
2158       y[11*(*idx)+8]  += alpha9*(*v);
2159       y[11*(*idx)+9]  += alpha10*(*v);
2160       y[11*(*idx)+10] += alpha11*(*v);
2161       idx++; v++;
2162     }
2163   }
2164   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2165   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2166   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 
2171 /*--------------------------------------------------------------------------------------------*/
2172 #undef __FUNCT__
2173 #define __FUNCT__ "MatMult_SeqMAIJ_16"
2174 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2175 {
2176   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2177   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2178   const PetscScalar *x,*v;
2179   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2180   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2181   PetscErrorCode    ierr;
2182   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2183   PetscInt          nonzerorow=0,n,i,jrow,j;
2184 
2185   PetscFunctionBegin;
2186   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2187   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2188   idx  = a->j;
2189   v    = a->a;
2190   ii   = a->i;
2191 
2192   for (i=0; i<m; i++) {
2193     jrow  = ii[i];
2194     n     = ii[i+1] - jrow;
2195     sum1  = 0.0;
2196     sum2  = 0.0;
2197     sum3  = 0.0;
2198     sum4  = 0.0;
2199     sum5  = 0.0;
2200     sum6  = 0.0;
2201     sum7  = 0.0;
2202     sum8  = 0.0;
2203     sum9  = 0.0;
2204     sum10 = 0.0;
2205     sum11 = 0.0;
2206     sum12 = 0.0;
2207     sum13 = 0.0;
2208     sum14 = 0.0;
2209     sum15 = 0.0;
2210     sum16 = 0.0;
2211 
2212     nonzerorow += (n>0);
2213     for (j=0; j<n; j++) {
2214       sum1  += v[jrow]*x[16*idx[jrow]];
2215       sum2  += v[jrow]*x[16*idx[jrow]+1];
2216       sum3  += v[jrow]*x[16*idx[jrow]+2];
2217       sum4  += v[jrow]*x[16*idx[jrow]+3];
2218       sum5  += v[jrow]*x[16*idx[jrow]+4];
2219       sum6  += v[jrow]*x[16*idx[jrow]+5];
2220       sum7  += v[jrow]*x[16*idx[jrow]+6];
2221       sum8  += v[jrow]*x[16*idx[jrow]+7];
2222       sum9  += v[jrow]*x[16*idx[jrow]+8];
2223       sum10 += v[jrow]*x[16*idx[jrow]+9];
2224       sum11 += v[jrow]*x[16*idx[jrow]+10];
2225       sum12 += v[jrow]*x[16*idx[jrow]+11];
2226       sum13 += v[jrow]*x[16*idx[jrow]+12];
2227       sum14 += v[jrow]*x[16*idx[jrow]+13];
2228       sum15 += v[jrow]*x[16*idx[jrow]+14];
2229       sum16 += v[jrow]*x[16*idx[jrow]+15];
2230       jrow++;
2231     }
2232     y[16*i]    = sum1;
2233     y[16*i+1]  = sum2;
2234     y[16*i+2]  = sum3;
2235     y[16*i+3]  = sum4;
2236     y[16*i+4]  = sum5;
2237     y[16*i+5]  = sum6;
2238     y[16*i+6]  = sum7;
2239     y[16*i+7]  = sum8;
2240     y[16*i+8]  = sum9;
2241     y[16*i+9]  = sum10;
2242     y[16*i+10] = sum11;
2243     y[16*i+11] = sum12;
2244     y[16*i+12] = sum13;
2245     y[16*i+13] = sum14;
2246     y[16*i+14] = sum15;
2247     y[16*i+15] = sum16;
2248   }
2249 
2250   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
2251   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2252   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
2258 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2259 {
2260   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2261   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2262   const PetscScalar *x,*v;
2263   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2264   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2265   PetscErrorCode    ierr;
2266   const PetscInt    m = b->AIJ->rmap->n,*idx;
2267   PetscInt          n,i;
2268 
2269   PetscFunctionBegin;
2270   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2271   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2272   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2273 
2274   for (i=0; i<m; i++) {
2275     idx     = a->j + a->i[i];
2276     v       = a->a + a->i[i];
2277     n       = a->i[i+1] - a->i[i];
2278     alpha1  = x[16*i];
2279     alpha2  = x[16*i+1];
2280     alpha3  = x[16*i+2];
2281     alpha4  = x[16*i+3];
2282     alpha5  = x[16*i+4];
2283     alpha6  = x[16*i+5];
2284     alpha7  = x[16*i+6];
2285     alpha8  = x[16*i+7];
2286     alpha9  = x[16*i+8];
2287     alpha10 = x[16*i+9];
2288     alpha11 = x[16*i+10];
2289     alpha12 = x[16*i+11];
2290     alpha13 = x[16*i+12];
2291     alpha14 = x[16*i+13];
2292     alpha15 = x[16*i+14];
2293     alpha16 = x[16*i+15];
2294     while (n-->0) {
2295       y[16*(*idx)]    += alpha1*(*v);
2296       y[16*(*idx)+1]  += alpha2*(*v);
2297       y[16*(*idx)+2]  += alpha3*(*v);
2298       y[16*(*idx)+3]  += alpha4*(*v);
2299       y[16*(*idx)+4]  += alpha5*(*v);
2300       y[16*(*idx)+5]  += alpha6*(*v);
2301       y[16*(*idx)+6]  += alpha7*(*v);
2302       y[16*(*idx)+7]  += alpha8*(*v);
2303       y[16*(*idx)+8]  += alpha9*(*v);
2304       y[16*(*idx)+9]  += alpha10*(*v);
2305       y[16*(*idx)+10] += alpha11*(*v);
2306       y[16*(*idx)+11] += alpha12*(*v);
2307       y[16*(*idx)+12] += alpha13*(*v);
2308       y[16*(*idx)+13] += alpha14*(*v);
2309       y[16*(*idx)+14] += alpha15*(*v);
2310       y[16*(*idx)+15] += alpha16*(*v);
2311       idx++; v++;
2312     }
2313   }
2314   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2315   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2316   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2317   PetscFunctionReturn(0);
2318 }
2319 
2320 #undef __FUNCT__
2321 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
2322 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2323 {
2324   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2325   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2326   const PetscScalar *x,*v;
2327   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2328   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2329   PetscErrorCode    ierr;
2330   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2331   PetscInt          n,i,jrow,j;
2332 
2333   PetscFunctionBegin;
2334   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2335   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2336   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2337   idx  = a->j;
2338   v    = a->a;
2339   ii   = a->i;
2340 
2341   for (i=0; i<m; i++) {
2342     jrow  = ii[i];
2343     n     = ii[i+1] - jrow;
2344     sum1  = 0.0;
2345     sum2  = 0.0;
2346     sum3  = 0.0;
2347     sum4  = 0.0;
2348     sum5  = 0.0;
2349     sum6  = 0.0;
2350     sum7  = 0.0;
2351     sum8  = 0.0;
2352     sum9  = 0.0;
2353     sum10 = 0.0;
2354     sum11 = 0.0;
2355     sum12 = 0.0;
2356     sum13 = 0.0;
2357     sum14 = 0.0;
2358     sum15 = 0.0;
2359     sum16 = 0.0;
2360     for (j=0; j<n; j++) {
2361       sum1  += v[jrow]*x[16*idx[jrow]];
2362       sum2  += v[jrow]*x[16*idx[jrow]+1];
2363       sum3  += v[jrow]*x[16*idx[jrow]+2];
2364       sum4  += v[jrow]*x[16*idx[jrow]+3];
2365       sum5  += v[jrow]*x[16*idx[jrow]+4];
2366       sum6  += v[jrow]*x[16*idx[jrow]+5];
2367       sum7  += v[jrow]*x[16*idx[jrow]+6];
2368       sum8  += v[jrow]*x[16*idx[jrow]+7];
2369       sum9  += v[jrow]*x[16*idx[jrow]+8];
2370       sum10 += v[jrow]*x[16*idx[jrow]+9];
2371       sum11 += v[jrow]*x[16*idx[jrow]+10];
2372       sum12 += v[jrow]*x[16*idx[jrow]+11];
2373       sum13 += v[jrow]*x[16*idx[jrow]+12];
2374       sum14 += v[jrow]*x[16*idx[jrow]+13];
2375       sum15 += v[jrow]*x[16*idx[jrow]+14];
2376       sum16 += v[jrow]*x[16*idx[jrow]+15];
2377       jrow++;
2378     }
2379     y[16*i]    += sum1;
2380     y[16*i+1]  += sum2;
2381     y[16*i+2]  += sum3;
2382     y[16*i+3]  += sum4;
2383     y[16*i+4]  += sum5;
2384     y[16*i+5]  += sum6;
2385     y[16*i+6]  += sum7;
2386     y[16*i+7]  += sum8;
2387     y[16*i+8]  += sum9;
2388     y[16*i+9]  += sum10;
2389     y[16*i+10] += sum11;
2390     y[16*i+11] += sum12;
2391     y[16*i+12] += sum13;
2392     y[16*i+13] += sum14;
2393     y[16*i+14] += sum15;
2394     y[16*i+15] += sum16;
2395   }
2396 
2397   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2398   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2399   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2400   PetscFunctionReturn(0);
2401 }
2402 
2403 #undef __FUNCT__
2404 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2405 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2406 {
2407   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2408   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2409   const PetscScalar *x,*v;
2410   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2411   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2412   PetscErrorCode    ierr;
2413   const PetscInt    m = b->AIJ->rmap->n,*idx;
2414   PetscInt          n,i;
2415 
2416   PetscFunctionBegin;
2417   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2418   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2419   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2420   for (i=0; i<m; i++) {
2421     idx     = a->j + a->i[i];
2422     v       = a->a + a->i[i];
2423     n       = a->i[i+1] - a->i[i];
2424     alpha1  = x[16*i];
2425     alpha2  = x[16*i+1];
2426     alpha3  = x[16*i+2];
2427     alpha4  = x[16*i+3];
2428     alpha5  = x[16*i+4];
2429     alpha6  = x[16*i+5];
2430     alpha7  = x[16*i+6];
2431     alpha8  = x[16*i+7];
2432     alpha9  = x[16*i+8];
2433     alpha10 = x[16*i+9];
2434     alpha11 = x[16*i+10];
2435     alpha12 = x[16*i+11];
2436     alpha13 = x[16*i+12];
2437     alpha14 = x[16*i+13];
2438     alpha15 = x[16*i+14];
2439     alpha16 = x[16*i+15];
2440     while (n-->0) {
2441       y[16*(*idx)]    += alpha1*(*v);
2442       y[16*(*idx)+1]  += alpha2*(*v);
2443       y[16*(*idx)+2]  += alpha3*(*v);
2444       y[16*(*idx)+3]  += alpha4*(*v);
2445       y[16*(*idx)+4]  += alpha5*(*v);
2446       y[16*(*idx)+5]  += alpha6*(*v);
2447       y[16*(*idx)+6]  += alpha7*(*v);
2448       y[16*(*idx)+7]  += alpha8*(*v);
2449       y[16*(*idx)+8]  += alpha9*(*v);
2450       y[16*(*idx)+9]  += alpha10*(*v);
2451       y[16*(*idx)+10] += alpha11*(*v);
2452       y[16*(*idx)+11] += alpha12*(*v);
2453       y[16*(*idx)+12] += alpha13*(*v);
2454       y[16*(*idx)+13] += alpha14*(*v);
2455       y[16*(*idx)+14] += alpha15*(*v);
2456       y[16*(*idx)+15] += alpha16*(*v);
2457       idx++; v++;
2458     }
2459   }
2460   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2461   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2462   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 /*--------------------------------------------------------------------------------------------*/
2467 #undef __FUNCT__
2468 #define __FUNCT__ "MatMult_SeqMAIJ_18"
2469 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2470 {
2471   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2472   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2473   const PetscScalar *x,*v;
2474   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2475   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2476   PetscErrorCode    ierr;
2477   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2478   PetscInt          nonzerorow=0,n,i,jrow,j;
2479 
2480   PetscFunctionBegin;
2481   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2482   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2483   idx  = a->j;
2484   v    = a->a;
2485   ii   = a->i;
2486 
2487   for (i=0; i<m; i++) {
2488     jrow  = ii[i];
2489     n     = ii[i+1] - jrow;
2490     sum1  = 0.0;
2491     sum2  = 0.0;
2492     sum3  = 0.0;
2493     sum4  = 0.0;
2494     sum5  = 0.0;
2495     sum6  = 0.0;
2496     sum7  = 0.0;
2497     sum8  = 0.0;
2498     sum9  = 0.0;
2499     sum10 = 0.0;
2500     sum11 = 0.0;
2501     sum12 = 0.0;
2502     sum13 = 0.0;
2503     sum14 = 0.0;
2504     sum15 = 0.0;
2505     sum16 = 0.0;
2506     sum17 = 0.0;
2507     sum18 = 0.0;
2508 
2509     nonzerorow += (n>0);
2510     for (j=0; j<n; j++) {
2511       sum1  += v[jrow]*x[18*idx[jrow]];
2512       sum2  += v[jrow]*x[18*idx[jrow]+1];
2513       sum3  += v[jrow]*x[18*idx[jrow]+2];
2514       sum4  += v[jrow]*x[18*idx[jrow]+3];
2515       sum5  += v[jrow]*x[18*idx[jrow]+4];
2516       sum6  += v[jrow]*x[18*idx[jrow]+5];
2517       sum7  += v[jrow]*x[18*idx[jrow]+6];
2518       sum8  += v[jrow]*x[18*idx[jrow]+7];
2519       sum9  += v[jrow]*x[18*idx[jrow]+8];
2520       sum10 += v[jrow]*x[18*idx[jrow]+9];
2521       sum11 += v[jrow]*x[18*idx[jrow]+10];
2522       sum12 += v[jrow]*x[18*idx[jrow]+11];
2523       sum13 += v[jrow]*x[18*idx[jrow]+12];
2524       sum14 += v[jrow]*x[18*idx[jrow]+13];
2525       sum15 += v[jrow]*x[18*idx[jrow]+14];
2526       sum16 += v[jrow]*x[18*idx[jrow]+15];
2527       sum17 += v[jrow]*x[18*idx[jrow]+16];
2528       sum18 += v[jrow]*x[18*idx[jrow]+17];
2529       jrow++;
2530     }
2531     y[18*i]    = sum1;
2532     y[18*i+1]  = sum2;
2533     y[18*i+2]  = sum3;
2534     y[18*i+3]  = sum4;
2535     y[18*i+4]  = sum5;
2536     y[18*i+5]  = sum6;
2537     y[18*i+6]  = sum7;
2538     y[18*i+7]  = sum8;
2539     y[18*i+8]  = sum9;
2540     y[18*i+9]  = sum10;
2541     y[18*i+10] = sum11;
2542     y[18*i+11] = sum12;
2543     y[18*i+12] = sum13;
2544     y[18*i+13] = sum14;
2545     y[18*i+14] = sum15;
2546     y[18*i+15] = sum16;
2547     y[18*i+16] = sum17;
2548     y[18*i+17] = sum18;
2549   }
2550 
2551   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
2552   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2553   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18"
2559 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2560 {
2561   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2562   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2563   const PetscScalar *x,*v;
2564   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2565   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2566   PetscErrorCode    ierr;
2567   const PetscInt    m = b->AIJ->rmap->n,*idx;
2568   PetscInt          n,i;
2569 
2570   PetscFunctionBegin;
2571   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2572   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2573   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2574 
2575   for (i=0; i<m; i++) {
2576     idx     = a->j + a->i[i];
2577     v       = a->a + a->i[i];
2578     n       = a->i[i+1] - a->i[i];
2579     alpha1  = x[18*i];
2580     alpha2  = x[18*i+1];
2581     alpha3  = x[18*i+2];
2582     alpha4  = x[18*i+3];
2583     alpha5  = x[18*i+4];
2584     alpha6  = x[18*i+5];
2585     alpha7  = x[18*i+6];
2586     alpha8  = x[18*i+7];
2587     alpha9  = x[18*i+8];
2588     alpha10 = x[18*i+9];
2589     alpha11 = x[18*i+10];
2590     alpha12 = x[18*i+11];
2591     alpha13 = x[18*i+12];
2592     alpha14 = x[18*i+13];
2593     alpha15 = x[18*i+14];
2594     alpha16 = x[18*i+15];
2595     alpha17 = x[18*i+16];
2596     alpha18 = x[18*i+17];
2597     while (n-->0) {
2598       y[18*(*idx)]    += alpha1*(*v);
2599       y[18*(*idx)+1]  += alpha2*(*v);
2600       y[18*(*idx)+2]  += alpha3*(*v);
2601       y[18*(*idx)+3]  += alpha4*(*v);
2602       y[18*(*idx)+4]  += alpha5*(*v);
2603       y[18*(*idx)+5]  += alpha6*(*v);
2604       y[18*(*idx)+6]  += alpha7*(*v);
2605       y[18*(*idx)+7]  += alpha8*(*v);
2606       y[18*(*idx)+8]  += alpha9*(*v);
2607       y[18*(*idx)+9]  += alpha10*(*v);
2608       y[18*(*idx)+10] += alpha11*(*v);
2609       y[18*(*idx)+11] += alpha12*(*v);
2610       y[18*(*idx)+12] += alpha13*(*v);
2611       y[18*(*idx)+13] += alpha14*(*v);
2612       y[18*(*idx)+14] += alpha15*(*v);
2613       y[18*(*idx)+15] += alpha16*(*v);
2614       y[18*(*idx)+16] += alpha17*(*v);
2615       y[18*(*idx)+17] += alpha18*(*v);
2616       idx++; v++;
2617     }
2618   }
2619   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2620   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2621   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2622   PetscFunctionReturn(0);
2623 }
2624 
2625 #undef __FUNCT__
2626 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18"
2627 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2628 {
2629   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2630   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2631   const PetscScalar *x,*v;
2632   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2633   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2634   PetscErrorCode    ierr;
2635   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2636   PetscInt          n,i,jrow,j;
2637 
2638   PetscFunctionBegin;
2639   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2640   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2641   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2642   idx  = a->j;
2643   v    = a->a;
2644   ii   = a->i;
2645 
2646   for (i=0; i<m; i++) {
2647     jrow  = ii[i];
2648     n     = ii[i+1] - jrow;
2649     sum1  = 0.0;
2650     sum2  = 0.0;
2651     sum3  = 0.0;
2652     sum4  = 0.0;
2653     sum5  = 0.0;
2654     sum6  = 0.0;
2655     sum7  = 0.0;
2656     sum8  = 0.0;
2657     sum9  = 0.0;
2658     sum10 = 0.0;
2659     sum11 = 0.0;
2660     sum12 = 0.0;
2661     sum13 = 0.0;
2662     sum14 = 0.0;
2663     sum15 = 0.0;
2664     sum16 = 0.0;
2665     sum17 = 0.0;
2666     sum18 = 0.0;
2667     for (j=0; j<n; j++) {
2668       sum1  += v[jrow]*x[18*idx[jrow]];
2669       sum2  += v[jrow]*x[18*idx[jrow]+1];
2670       sum3  += v[jrow]*x[18*idx[jrow]+2];
2671       sum4  += v[jrow]*x[18*idx[jrow]+3];
2672       sum5  += v[jrow]*x[18*idx[jrow]+4];
2673       sum6  += v[jrow]*x[18*idx[jrow]+5];
2674       sum7  += v[jrow]*x[18*idx[jrow]+6];
2675       sum8  += v[jrow]*x[18*idx[jrow]+7];
2676       sum9  += v[jrow]*x[18*idx[jrow]+8];
2677       sum10 += v[jrow]*x[18*idx[jrow]+9];
2678       sum11 += v[jrow]*x[18*idx[jrow]+10];
2679       sum12 += v[jrow]*x[18*idx[jrow]+11];
2680       sum13 += v[jrow]*x[18*idx[jrow]+12];
2681       sum14 += v[jrow]*x[18*idx[jrow]+13];
2682       sum15 += v[jrow]*x[18*idx[jrow]+14];
2683       sum16 += v[jrow]*x[18*idx[jrow]+15];
2684       sum17 += v[jrow]*x[18*idx[jrow]+16];
2685       sum18 += v[jrow]*x[18*idx[jrow]+17];
2686       jrow++;
2687     }
2688     y[18*i]    += sum1;
2689     y[18*i+1]  += sum2;
2690     y[18*i+2]  += sum3;
2691     y[18*i+3]  += sum4;
2692     y[18*i+4]  += sum5;
2693     y[18*i+5]  += sum6;
2694     y[18*i+6]  += sum7;
2695     y[18*i+7]  += sum8;
2696     y[18*i+8]  += sum9;
2697     y[18*i+9]  += sum10;
2698     y[18*i+10] += sum11;
2699     y[18*i+11] += sum12;
2700     y[18*i+12] += sum13;
2701     y[18*i+13] += sum14;
2702     y[18*i+14] += sum15;
2703     y[18*i+15] += sum16;
2704     y[18*i+16] += sum17;
2705     y[18*i+17] += sum18;
2706   }
2707 
2708   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2709   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2710   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2711   PetscFunctionReturn(0);
2712 }
2713 
2714 #undef __FUNCT__
2715 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18"
2716 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2717 {
2718   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2719   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2720   const PetscScalar *x,*v;
2721   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2722   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2723   PetscErrorCode    ierr;
2724   const PetscInt    m = b->AIJ->rmap->n,*idx;
2725   PetscInt          n,i;
2726 
2727   PetscFunctionBegin;
2728   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2729   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2730   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2731   for (i=0; i<m; i++) {
2732     idx     = a->j + a->i[i];
2733     v       = a->a + a->i[i];
2734     n       = a->i[i+1] - a->i[i];
2735     alpha1  = x[18*i];
2736     alpha2  = x[18*i+1];
2737     alpha3  = x[18*i+2];
2738     alpha4  = x[18*i+3];
2739     alpha5  = x[18*i+4];
2740     alpha6  = x[18*i+5];
2741     alpha7  = x[18*i+6];
2742     alpha8  = x[18*i+7];
2743     alpha9  = x[18*i+8];
2744     alpha10 = x[18*i+9];
2745     alpha11 = x[18*i+10];
2746     alpha12 = x[18*i+11];
2747     alpha13 = x[18*i+12];
2748     alpha14 = x[18*i+13];
2749     alpha15 = x[18*i+14];
2750     alpha16 = x[18*i+15];
2751     alpha17 = x[18*i+16];
2752     alpha18 = x[18*i+17];
2753     while (n-->0) {
2754       y[18*(*idx)]    += alpha1*(*v);
2755       y[18*(*idx)+1]  += alpha2*(*v);
2756       y[18*(*idx)+2]  += alpha3*(*v);
2757       y[18*(*idx)+3]  += alpha4*(*v);
2758       y[18*(*idx)+4]  += alpha5*(*v);
2759       y[18*(*idx)+5]  += alpha6*(*v);
2760       y[18*(*idx)+6]  += alpha7*(*v);
2761       y[18*(*idx)+7]  += alpha8*(*v);
2762       y[18*(*idx)+8]  += alpha9*(*v);
2763       y[18*(*idx)+9]  += alpha10*(*v);
2764       y[18*(*idx)+10] += alpha11*(*v);
2765       y[18*(*idx)+11] += alpha12*(*v);
2766       y[18*(*idx)+12] += alpha13*(*v);
2767       y[18*(*idx)+13] += alpha14*(*v);
2768       y[18*(*idx)+14] += alpha15*(*v);
2769       y[18*(*idx)+15] += alpha16*(*v);
2770       y[18*(*idx)+16] += alpha17*(*v);
2771       y[18*(*idx)+17] += alpha18*(*v);
2772       idx++; v++;
2773     }
2774   }
2775   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2776   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2777   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2778   PetscFunctionReturn(0);
2779 }
2780 
2781 #undef __FUNCT__
2782 #define __FUNCT__ "MatMult_SeqMAIJ_N"
2783 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2784 {
2785   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2786   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2787   const PetscScalar *x,*v;
2788   PetscScalar       *y,*sums;
2789   PetscErrorCode    ierr;
2790   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2791   PetscInt          n,i,jrow,j,dof = b->dof,k;
2792 
2793   PetscFunctionBegin;
2794   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2795   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2796   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2797   idx  = a->j;
2798   v    = a->a;
2799   ii   = a->i;
2800 
2801   for (i=0; i<m; i++) {
2802     jrow = ii[i];
2803     n    = ii[i+1] - jrow;
2804     sums = y + dof*i;
2805     for (j=0; j<n; j++) {
2806       for (k=0; k<dof; k++) {
2807         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2808       }
2809       jrow++;
2810     }
2811   }
2812 
2813   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2814   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2815   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2816   PetscFunctionReturn(0);
2817 }
2818 
2819 #undef __FUNCT__
2820 #define __FUNCT__ "MatMultAdd_SeqMAIJ_N"
2821 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2822 {
2823   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2824   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2825   const PetscScalar *x,*v;
2826   PetscScalar       *y,*sums;
2827   PetscErrorCode    ierr;
2828   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2829   PetscInt          n,i,jrow,j,dof = b->dof,k;
2830 
2831   PetscFunctionBegin;
2832   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2833   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2834   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2835   idx  = a->j;
2836   v    = a->a;
2837   ii   = a->i;
2838 
2839   for (i=0; i<m; i++) {
2840     jrow = ii[i];
2841     n    = ii[i+1] - jrow;
2842     sums = y + dof*i;
2843     for (j=0; j<n; j++) {
2844       for (k=0; k<dof; k++) {
2845         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2846       }
2847       jrow++;
2848     }
2849   }
2850 
2851   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2852   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2853   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2854   PetscFunctionReturn(0);
2855 }
2856 
2857 #undef __FUNCT__
2858 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N"
2859 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2860 {
2861   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2862   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2863   const PetscScalar *x,*v,*alpha;
2864   PetscScalar       *y;
2865   PetscErrorCode    ierr;
2866   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2867   PetscInt          n,i,k;
2868 
2869   PetscFunctionBegin;
2870   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2871   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2872   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2873   for (i=0; i<m; i++) {
2874     idx   = a->j + a->i[i];
2875     v     = a->a + a->i[i];
2876     n     = a->i[i+1] - a->i[i];
2877     alpha = x + dof*i;
2878     while (n-->0) {
2879       for (k=0; k<dof; k++) {
2880         y[dof*(*idx)+k] += alpha[k]*(*v);
2881       }
2882       idx++; v++;
2883     }
2884   }
2885   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2886   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2887   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2888   PetscFunctionReturn(0);
2889 }
2890 
2891 #undef __FUNCT__
2892 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N"
2893 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2894 {
2895   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2896   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2897   const PetscScalar *x,*v,*alpha;
2898   PetscScalar       *y;
2899   PetscErrorCode    ierr;
2900   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2901   PetscInt          n,i,k;
2902 
2903   PetscFunctionBegin;
2904   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2905   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2906   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2907   for (i=0; i<m; i++) {
2908     idx   = a->j + a->i[i];
2909     v     = a->a + a->i[i];
2910     n     = a->i[i+1] - a->i[i];
2911     alpha = x + dof*i;
2912     while (n-->0) {
2913       for (k=0; k<dof; k++) {
2914         y[dof*(*idx)+k] += alpha[k]*(*v);
2915       }
2916       idx++; v++;
2917     }
2918   }
2919   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2920   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2921   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2922   PetscFunctionReturn(0);
2923 }
2924 
2925 /*===================================================================================*/
2926 #undef __FUNCT__
2927 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2928 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2929 {
2930   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2931   PetscErrorCode ierr;
2932 
2933   PetscFunctionBegin;
2934   /* start the scatter */
2935   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2936   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2937   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2938   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2939   PetscFunctionReturn(0);
2940 }
2941 
2942 #undef __FUNCT__
2943 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2944 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2945 {
2946   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2947   PetscErrorCode ierr;
2948 
2949   PetscFunctionBegin;
2950   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2951   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2952   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2953   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2954   PetscFunctionReturn(0);
2955 }
2956 
2957 #undef __FUNCT__
2958 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2959 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2960 {
2961   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2962   PetscErrorCode ierr;
2963 
2964   PetscFunctionBegin;
2965   /* start the scatter */
2966   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2967   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2968   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2969   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2970   PetscFunctionReturn(0);
2971 }
2972 
2973 #undef __FUNCT__
2974 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2975 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2976 {
2977   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2978   PetscErrorCode ierr;
2979 
2980   PetscFunctionBegin;
2981   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2982   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2983   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2984   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2985   PetscFunctionReturn(0);
2986 }
2987 
2988 /* ----------------------------------------------------------------*/
2989 #undef __FUNCT__
2990 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
2991 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2992 {
2993   PetscErrorCode     ierr;
2994   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2995   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2996   Mat                P         =pp->AIJ;
2997   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2998   PetscInt           *pti,*ptj,*ptJ;
2999   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
3000   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
3001   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
3002   MatScalar          *ca;
3003   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
3004 
3005   PetscFunctionBegin;
3006   /* Get ij structure of P^T */
3007   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
3008 
3009   cn = pn*ppdof;
3010   /* Allocate ci array, arrays for fill computation and */
3011   /* free space for accumulating nonzero column info */
3012   ierr  = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr);
3013   ci[0] = 0;
3014 
3015   /* Work arrays for rows of P^T*A */
3016   ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr);
3017   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
3018   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
3019 
3020   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
3021   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
3022   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3023   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);CHKERRQ(ierr);
3024   current_space = free_space;
3025 
3026   /* Determine symbolic info for each row of C: */
3027   for (i=0; i<pn; i++) {
3028     ptnzi = pti[i+1] - pti[i];
3029     ptJ   = ptj + pti[i];
3030     for (dof=0; dof<ppdof; dof++) {
3031       ptanzi = 0;
3032       /* Determine symbolic row of PtA: */
3033       for (j=0; j<ptnzi; j++) {
3034         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
3035         arow = ptJ[j]*ppdof + dof;
3036         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
3037         anzj = ai[arow+1] - ai[arow];
3038         ajj  = aj + ai[arow];
3039         for (k=0; k<anzj; k++) {
3040           if (!ptadenserow[ajj[k]]) {
3041             ptadenserow[ajj[k]]    = -1;
3042             ptasparserow[ptanzi++] = ajj[k];
3043           }
3044         }
3045       }
3046       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
3047       ptaj = ptasparserow;
3048       cnzi = 0;
3049       for (j=0; j<ptanzi; j++) {
3050         /* Get offset within block of P */
3051         pshift = *ptaj%ppdof;
3052         /* Get block row of P */
3053         prow = (*ptaj++)/ppdof; /* integer division */
3054         /* P has same number of nonzeros per row as the compressed form */
3055         pnzj = pi[prow+1] - pi[prow];
3056         pjj  = pj + pi[prow];
3057         for (k=0;k<pnzj;k++) {
3058           /* Locations in C are shifted by the offset within the block */
3059           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3060           if (!denserow[pjj[k]*ppdof+pshift]) {
3061             denserow[pjj[k]*ppdof+pshift] = -1;
3062             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
3063           }
3064         }
3065       }
3066 
3067       /* sort sparserow */
3068       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
3069 
3070       /* If free space is not available, make more free space */
3071       /* Double the amount of total space in the list */
3072       if (current_space->local_remaining<cnzi) {
3073         ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
3074       }
3075 
3076       /* Copy data into free space, and zero out denserows */
3077       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
3078 
3079       current_space->array           += cnzi;
3080       current_space->local_used      += cnzi;
3081       current_space->local_remaining -= cnzi;
3082 
3083       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
3084       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
3085 
3086       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3087       /*        For now, we will recompute what is needed. */
3088       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3089     }
3090   }
3091   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3092   /* Allocate space for cj, initialize cj, and */
3093   /* destroy list of free space and other temporary array(s) */
3094   ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr);
3095   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
3096   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
3097 
3098   /* Allocate space for ca */
3099   ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr);
3100 
3101   /* put together the new matrix */
3102   ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
3103   ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr);
3104 
3105   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3106   /* Since these are PETSc arrays, change flags to free them as necessary. */
3107   c          = (Mat_SeqAIJ*)((*C)->data);
3108   c->free_a  = PETSC_TRUE;
3109   c->free_ij = PETSC_TRUE;
3110   c->nonew   = 0;
3111 
3112   (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3113 
3114   /* Clean up. */
3115   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
3116   PetscFunctionReturn(0);
3117 }
3118 
3119 #undef __FUNCT__
3120 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
3121 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3122 {
3123   /* This routine requires testing -- first draft only */
3124   PetscErrorCode  ierr;
3125   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3126   Mat             P  =pp->AIJ;
3127   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
3128   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
3129   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3130   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3131   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3132   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3133   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3134   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3135   MatScalar       *ca=c->a,*caj,*apa;
3136 
3137   PetscFunctionBegin;
3138   /* Allocate temporary array for storage of one row of A*P */
3139   ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr);
3140 
3141   /* Clear old values in C */
3142   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
3143 
3144   for (i=0; i<am; i++) {
3145     /* Form sparse row of A*P */
3146     anzi  = ai[i+1] - ai[i];
3147     apnzj = 0;
3148     for (j=0; j<anzi; j++) {
3149       /* Get offset within block of P */
3150       pshift = *aj%ppdof;
3151       /* Get block row of P */
3152       prow = *aj++/ppdof;   /* integer division */
3153       pnzj = pi[prow+1] - pi[prow];
3154       pjj  = pj + pi[prow];
3155       paj  = pa + pi[prow];
3156       for (k=0; k<pnzj; k++) {
3157         poffset = pjj[k]*ppdof+pshift;
3158         if (!apjdense[poffset]) {
3159           apjdense[poffset] = -1;
3160           apj[apnzj++]      = poffset;
3161         }
3162         apa[poffset] += (*aa)*paj[k];
3163       }
3164       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
3165       aa++;
3166     }
3167 
3168     /* Sort the j index array for quick sparse axpy. */
3169     /* Note: a array does not need sorting as it is in dense storage locations. */
3170     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
3171 
3172     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3173     prow    = i/ppdof; /* integer division */
3174     pshift  = i%ppdof;
3175     poffset = pi[prow];
3176     pnzi    = pi[prow+1] - poffset;
3177     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3178     pJ = pj+poffset;
3179     pA = pa+poffset;
3180     for (j=0; j<pnzi; j++) {
3181       crow = (*pJ)*ppdof+pshift;
3182       cjj  = cj + ci[crow];
3183       caj  = ca + ci[crow];
3184       pJ++;
3185       /* Perform sparse axpy operation.  Note cjj includes apj. */
3186       for (k=0,nextap=0; nextap<apnzj; k++) {
3187         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3188       }
3189       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
3190       pA++;
3191     }
3192 
3193     /* Zero the current row info for A*P */
3194     for (j=0; j<apnzj; j++) {
3195       apa[apj[j]]      = 0.;
3196       apjdense[apj[j]] = 0;
3197     }
3198   }
3199 
3200   /* Assemble the final matrix and clean up */
3201   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3202   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3203   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
3204   PetscFunctionReturn(0);
3205 }
3206 
3207 #undef __FUNCT__
3208 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqMAIJ"
3209 PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3210 {
3211   PetscErrorCode ierr;
3212 
3213   PetscFunctionBegin;
3214   if (scall == MAT_INITIAL_MATRIX) {
3215     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3216     ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr);
3217     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3218   }
3219   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3220   ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr);
3221   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3222   PetscFunctionReturn(0);
3223 }
3224 
3225 #undef __FUNCT__
3226 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ"
3227 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3228 {
3229   PetscErrorCode ierr;
3230 
3231   PetscFunctionBegin;
3232   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3233   ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr);
3234   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
3235   PetscFunctionReturn(0);
3236 }
3237 
3238 #undef __FUNCT__
3239 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ"
3240 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3241 {
3242   PetscFunctionBegin;
3243   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3244   PetscFunctionReturn(0);
3245 }
3246 
3247 #undef __FUNCT__
3248 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIMAIJ"
3249 PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3250 {
3251   PetscErrorCode ierr;
3252 
3253   PetscFunctionBegin;
3254   if (scall == MAT_INITIAL_MATRIX) {
3255     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3256     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr);
3257     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3258   }
3259   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3260   ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr);
3261   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3262   PetscFunctionReturn(0);
3263 }
3264 
3265 #undef __FUNCT__
3266 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
3267 PETSC_EXTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3268 {
3269   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3270   Mat            a    = b->AIJ,B;
3271   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3272   PetscErrorCode ierr;
3273   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3274   PetscInt       *cols;
3275   PetscScalar    *vals;
3276 
3277   PetscFunctionBegin;
3278   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3279   ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr);
3280   for (i=0; i<m; i++) {
3281     nmax = PetscMax(nmax,aij->ilen[i]);
3282     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3283   }
3284   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
3285   ierr = PetscFree(ilen);CHKERRQ(ierr);
3286   ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr);
3287   ii   = 0;
3288   for (i=0; i<m; i++) {
3289     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3290     for (j=0; j<dof; j++) {
3291       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3292       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3293       ii++;
3294     }
3295     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3296   }
3297   ierr = PetscFree(icols);CHKERRQ(ierr);
3298   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3299   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3300 
3301   if (reuse == MAT_REUSE_MATRIX) {
3302     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3303   } else {
3304     *newmat = B;
3305   }
3306   PetscFunctionReturn(0);
3307 }
3308 
3309 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3310 
3311 #undef __FUNCT__
3312 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
3313 PETSC_EXTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3314 {
3315   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3316   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3317   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3318   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3319   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3320   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3321   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3322   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3323   PetscInt       rstart,cstart,*garray,ii,k;
3324   PetscErrorCode ierr;
3325   PetscScalar    *vals,*ovals;
3326 
3327   PetscFunctionBegin;
3328   ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr);
3329   for (i=0; i<A->rmap->n/dof; i++) {
3330     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3331     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3332     for (j=0; j<dof; j++) {
3333       dnz[dof*i+j] = AIJ->ilen[i];
3334       onz[dof*i+j] = OAIJ->ilen[i];
3335     }
3336   }
3337   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
3338   ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr);
3339   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
3340 
3341   ierr   = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr);
3342   rstart = dof*maij->A->rmap->rstart;
3343   cstart = dof*maij->A->cmap->rstart;
3344   garray = mpiaij->garray;
3345 
3346   ii = rstart;
3347   for (i=0; i<A->rmap->n/dof; i++) {
3348     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3349     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3350     for (j=0; j<dof; j++) {
3351       for (k=0; k<ncols; k++) {
3352         icols[k] = cstart + dof*cols[k]+j;
3353       }
3354       for (k=0; k<oncols; k++) {
3355         oicols[k] = dof*garray[ocols[k]]+j;
3356       }
3357       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3358       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
3359       ii++;
3360     }
3361     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3362     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3363   }
3364   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
3365 
3366   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3367   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3368 
3369   if (reuse == MAT_REUSE_MATRIX) {
3370     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3371     ((PetscObject)A)->refct = 1;
3372 
3373     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3374 
3375     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3376   } else {
3377     *newmat = B;
3378   }
3379   PetscFunctionReturn(0);
3380 }
3381 
3382 #undef __FUNCT__
3383 #define __FUNCT__ "MatGetSubMatrix_MAIJ"
3384 PetscErrorCode  MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3385 {
3386   PetscErrorCode ierr;
3387   Mat            A;
3388 
3389   PetscFunctionBegin;
3390   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3391   ierr = MatGetSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
3392   ierr = MatDestroy(&A);CHKERRQ(ierr);
3393   PetscFunctionReturn(0);
3394 }
3395 
3396 /* ---------------------------------------------------------------------------------- */
3397 #undef __FUNCT__
3398 #define __FUNCT__ "MatCreateMAIJ"
3399 /*@C
3400   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3401   operations for multicomponent problems.  It interpolates each component the same
3402   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3403   and MATMPIAIJ for distributed matrices.
3404 
3405   Collective
3406 
3407   Input Parameters:
3408 + A - the AIJ matrix describing the action on blocks
3409 - dof - the block size (number of components per node)
3410 
3411   Output Parameter:
3412 . maij - the new MAIJ matrix
3413 
3414   Operations provided:
3415 + MatMult
3416 . MatMultTranspose
3417 . MatMultAdd
3418 . MatMultTransposeAdd
3419 - MatView
3420 
3421   Level: advanced
3422 
3423 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3424 @*/
3425 PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3426 {
3427   PetscErrorCode ierr;
3428   PetscMPIInt    size;
3429   PetscInt       n;
3430   Mat            B;
3431 
3432   PetscFunctionBegin;
3433   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3434 
3435   if (dof == 1) *maij = A;
3436   else {
3437     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3438     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3439     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3440     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3441     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3442     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3443 
3444     B->assembled = PETSC_TRUE;
3445 
3446     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3447     if (size == 1) {
3448       Mat_SeqMAIJ *b;
3449 
3450       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
3451 
3452       B->ops->setup   = NULL;
3453       B->ops->destroy = MatDestroy_SeqMAIJ;
3454       B->ops->view    = MatView_SeqMAIJ;
3455       b               = (Mat_SeqMAIJ*)B->data;
3456       b->dof          = dof;
3457       b->AIJ          = A;
3458 
3459       if (dof == 2) {
3460         B->ops->mult             = MatMult_SeqMAIJ_2;
3461         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3462         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3463         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3464       } else if (dof == 3) {
3465         B->ops->mult             = MatMult_SeqMAIJ_3;
3466         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3467         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3468         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3469       } else if (dof == 4) {
3470         B->ops->mult             = MatMult_SeqMAIJ_4;
3471         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3472         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3473         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3474       } else if (dof == 5) {
3475         B->ops->mult             = MatMult_SeqMAIJ_5;
3476         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3477         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3478         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3479       } else if (dof == 6) {
3480         B->ops->mult             = MatMult_SeqMAIJ_6;
3481         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3482         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3483         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3484       } else if (dof == 7) {
3485         B->ops->mult             = MatMult_SeqMAIJ_7;
3486         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3487         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3488         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3489       } else if (dof == 8) {
3490         B->ops->mult             = MatMult_SeqMAIJ_8;
3491         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3492         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3493         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3494       } else if (dof == 9) {
3495         B->ops->mult             = MatMult_SeqMAIJ_9;
3496         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3497         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3498         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3499       } else if (dof == 10) {
3500         B->ops->mult             = MatMult_SeqMAIJ_10;
3501         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3502         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3503         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3504       } else if (dof == 11) {
3505         B->ops->mult             = MatMult_SeqMAIJ_11;
3506         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3507         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3508         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3509       } else if (dof == 16) {
3510         B->ops->mult             = MatMult_SeqMAIJ_16;
3511         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3512         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3513         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3514       } else if (dof == 18) {
3515         B->ops->mult             = MatMult_SeqMAIJ_18;
3516         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3517         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3518         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3519       } else {
3520         B->ops->mult             = MatMult_SeqMAIJ_N;
3521         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3522         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3523         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3524       }
3525       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3526       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3527     } else {
3528       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3529       Mat_MPIMAIJ *b;
3530       IS          from,to;
3531       Vec         gvec;
3532 
3533       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3534 
3535       B->ops->setup   = NULL;
3536       B->ops->destroy = MatDestroy_MPIMAIJ;
3537       B->ops->view    = MatView_MPIMAIJ;
3538 
3539       b      = (Mat_MPIMAIJ*)B->data;
3540       b->dof = dof;
3541       b->A   = A;
3542 
3543       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
3544       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
3545 
3546       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3547       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3548       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
3549       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3550       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3551 
3552       /* create two temporary Index sets for build scatter gather */
3553       ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3554       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3555 
3556       /* create temporary global vector to generate scatter context */
3557       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
3558 
3559       /* generate the scatter context */
3560       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3561 
3562       ierr = ISDestroy(&from);CHKERRQ(ierr);
3563       ierr = ISDestroy(&to);CHKERRQ(ierr);
3564       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3565 
3566       B->ops->mult             = MatMult_MPIMAIJ_dof;
3567       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3568       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3569       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3570 
3571       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3572       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr);
3573     }
3574     B->ops->getsubmatrix = MatGetSubMatrix_MAIJ;
3575     ierr  = MatSetUp(B);CHKERRQ(ierr);
3576     *maij = B;
3577     ierr  = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr);
3578   }
3579   PetscFunctionReturn(0);
3580 }
3581