xref: /petsc/src/mat/impls/maij/maij.c (revision bebe2cf65d55febe21a5af8db2bd2e168caaa2e7)
1 
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20 #include <../src/mat/utils/freespace.h>
21 #include <petsc/private/vecimpl.h>
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatMAIJGetAIJ"
25 /*@
26    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
27 
28    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
29 
30    Input Parameter:
31 .  A - the MAIJ matrix
32 
33    Output Parameter:
34 .  B - the AIJ matrix
35 
36    Level: advanced
37 
38    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
39 
40 .seealso: MatCreateMAIJ()
41 @*/
42 PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
43 {
44   PetscErrorCode ierr;
45   PetscBool      ismpimaij,isseqmaij;
46 
47   PetscFunctionBegin;
48   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
49   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
50   if (ismpimaij) {
51     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
52 
53     *B = b->A;
54   } else if (isseqmaij) {
55     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
56 
57     *B = b->AIJ;
58   } else {
59     *B = A;
60   }
61   PetscFunctionReturn(0);
62 }
63 
64 #undef __FUNCT__
65 #define __FUNCT__ "MatMAIJRedimension"
66 /*@C
67    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
68 
69    Logically Collective
70 
71    Input Parameter:
72 +  A - the MAIJ matrix
73 -  dof - the block size for the new matrix
74 
75    Output Parameter:
76 .  B - the new MAIJ matrix
77 
78    Level: advanced
79 
80 .seealso: MatCreateMAIJ()
81 @*/
82 PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
83 {
84   PetscErrorCode ierr;
85   Mat            Aij = NULL;
86 
87   PetscFunctionBegin;
88   PetscValidLogicalCollectiveInt(A,dof,2);
89   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
90   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 
94 #undef __FUNCT__
95 #define __FUNCT__ "MatDestroy_SeqMAIJ"
96 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
97 {
98   PetscErrorCode ierr;
99   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
100 
101   PetscFunctionBegin;
102   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
103   ierr = PetscFree(A->data);CHKERRQ(ierr);
104   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr);
105   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr);
106   PetscFunctionReturn(0);
107 }
108 
109 #undef __FUNCT__
110 #define __FUNCT__ "MatSetUp_MAIJ"
111 PetscErrorCode MatSetUp_MAIJ(Mat A)
112 {
113   PetscFunctionBegin;
114   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
115   PetscFunctionReturn(0);
116 }
117 
118 #undef __FUNCT__
119 #define __FUNCT__ "MatView_SeqMAIJ"
120 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
121 {
122   PetscErrorCode ierr;
123   Mat            B;
124 
125   PetscFunctionBegin;
126   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
127   ierr = MatView(B,viewer);CHKERRQ(ierr);
128   ierr = MatDestroy(&B);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "MatView_MPIMAIJ"
134 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
135 {
136   PetscErrorCode ierr;
137   Mat            B;
138 
139   PetscFunctionBegin;
140   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
141   ierr = MatView(B,viewer);CHKERRQ(ierr);
142   ierr = MatDestroy(&B);CHKERRQ(ierr);
143   PetscFunctionReturn(0);
144 }
145 
146 #undef __FUNCT__
147 #define __FUNCT__ "MatDestroy_MPIMAIJ"
148 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
149 {
150   PetscErrorCode ierr;
151   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
152 
153   PetscFunctionBegin;
154   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
155   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
156   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
157   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
158   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
159   ierr = PetscFree(A->data);CHKERRQ(ierr);
160   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr);
161   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr);
162   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
163   PetscFunctionReturn(0);
164 }
165 
166 /*MC
167   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
168   multicomponent problems, interpolating or restricting each component the same way independently.
169   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
170 
171   Operations provided:
172 . MatMult
173 . MatMultTranspose
174 . MatMultAdd
175 . MatMultTransposeAdd
176 
177   Level: advanced
178 
179 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
180 M*/
181 
182 #undef __FUNCT__
183 #define __FUNCT__ "MatCreate_MAIJ"
184 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
185 {
186   PetscErrorCode ierr;
187   Mat_MPIMAIJ    *b;
188   PetscMPIInt    size;
189 
190   PetscFunctionBegin;
191   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
192   A->data  = (void*)b;
193 
194   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
195 
196   A->ops->setup = MatSetUp_MAIJ;
197 
198   b->AIJ  = 0;
199   b->dof  = 0;
200   b->OAIJ = 0;
201   b->ctx  = 0;
202   b->w    = 0;
203   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
204   if (size == 1) {
205     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
206   } else {
207     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
208   }
209   A->preallocated  = PETSC_TRUE;
210   A->assembled     = PETSC_TRUE;
211   PetscFunctionReturn(0);
212 }
213 
214 /* --------------------------------------------------------------------------------------*/
215 #undef __FUNCT__
216 #define __FUNCT__ "MatMult_SeqMAIJ_2"
217 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
218 {
219   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
220   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
221   const PetscScalar *x,*v;
222   PetscScalar       *y, sum1, sum2;
223   PetscErrorCode    ierr;
224   PetscInt          nonzerorow=0,n,i,jrow,j;
225   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
226 
227   PetscFunctionBegin;
228   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
229   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
230   idx  = a->j;
231   v    = a->a;
232   ii   = a->i;
233 
234   for (i=0; i<m; i++) {
235     jrow  = ii[i];
236     n     = ii[i+1] - jrow;
237     sum1  = 0.0;
238     sum2  = 0.0;
239 
240     nonzerorow += (n>0);
241     for (j=0; j<n; j++) {
242       sum1 += v[jrow]*x[2*idx[jrow]];
243       sum2 += v[jrow]*x[2*idx[jrow]+1];
244       jrow++;
245     }
246     y[2*i]   = sum1;
247     y[2*i+1] = sum2;
248   }
249 
250   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
251   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
252   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
258 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
259 {
260   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
261   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
262   const PetscScalar *x,*v;
263   PetscScalar       *y,alpha1,alpha2;
264   PetscErrorCode    ierr;
265   PetscInt          n,i;
266   const PetscInt    m = b->AIJ->rmap->n,*idx;
267 
268   PetscFunctionBegin;
269   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
270   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
271   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
272 
273   for (i=0; i<m; i++) {
274     idx    = a->j + a->i[i];
275     v      = a->a + a->i[i];
276     n      = a->i[i+1] - a->i[i];
277     alpha1 = x[2*i];
278     alpha2 = x[2*i+1];
279     while (n-->0) {
280       y[2*(*idx)]   += alpha1*(*v);
281       y[2*(*idx)+1] += alpha2*(*v);
282       idx++; v++;
283     }
284   }
285   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
286   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
287   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
288   PetscFunctionReturn(0);
289 }
290 
291 #undef __FUNCT__
292 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
293 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
294 {
295   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
296   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
297   const PetscScalar *x,*v;
298   PetscScalar       *y,sum1, sum2;
299   PetscErrorCode    ierr;
300   PetscInt          n,i,jrow,j;
301   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
302 
303   PetscFunctionBegin;
304   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
305   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
306   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
307   idx  = a->j;
308   v    = a->a;
309   ii   = a->i;
310 
311   for (i=0; i<m; i++) {
312     jrow = ii[i];
313     n    = ii[i+1] - jrow;
314     sum1 = 0.0;
315     sum2 = 0.0;
316     for (j=0; j<n; j++) {
317       sum1 += v[jrow]*x[2*idx[jrow]];
318       sum2 += v[jrow]*x[2*idx[jrow]+1];
319       jrow++;
320     }
321     y[2*i]   += sum1;
322     y[2*i+1] += sum2;
323   }
324 
325   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
326   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
327   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
328   PetscFunctionReturn(0);
329 }
330 #undef __FUNCT__
331 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
332 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
333 {
334   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
335   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
336   const PetscScalar *x,*v;
337   PetscScalar       *y,alpha1,alpha2;
338   PetscErrorCode    ierr;
339   PetscInt          n,i;
340   const PetscInt    m = b->AIJ->rmap->n,*idx;
341 
342   PetscFunctionBegin;
343   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
344   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
345   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
346 
347   for (i=0; i<m; i++) {
348     idx    = a->j + a->i[i];
349     v      = a->a + a->i[i];
350     n      = a->i[i+1] - a->i[i];
351     alpha1 = x[2*i];
352     alpha2 = x[2*i+1];
353     while (n-->0) {
354       y[2*(*idx)]   += alpha1*(*v);
355       y[2*(*idx)+1] += alpha2*(*v);
356       idx++; v++;
357     }
358   }
359   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
360   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
361   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
362   PetscFunctionReturn(0);
363 }
364 /* --------------------------------------------------------------------------------------*/
365 #undef __FUNCT__
366 #define __FUNCT__ "MatMult_SeqMAIJ_3"
367 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
368 {
369   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
370   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
371   const PetscScalar *x,*v;
372   PetscScalar       *y,sum1, sum2, sum3;
373   PetscErrorCode    ierr;
374   PetscInt          nonzerorow=0,n,i,jrow,j;
375   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
376 
377   PetscFunctionBegin;
378   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
379   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
380   idx  = a->j;
381   v    = a->a;
382   ii   = a->i;
383 
384   for (i=0; i<m; i++) {
385     jrow  = ii[i];
386     n     = ii[i+1] - jrow;
387     sum1  = 0.0;
388     sum2  = 0.0;
389     sum3  = 0.0;
390 
391     nonzerorow += (n>0);
392     for (j=0; j<n; j++) {
393       sum1 += v[jrow]*x[3*idx[jrow]];
394       sum2 += v[jrow]*x[3*idx[jrow]+1];
395       sum3 += v[jrow]*x[3*idx[jrow]+2];
396       jrow++;
397      }
398     y[3*i]   = sum1;
399     y[3*i+1] = sum2;
400     y[3*i+2] = sum3;
401   }
402 
403   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
404   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
405   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
411 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
412 {
413   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
414   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
415   const PetscScalar *x,*v;
416   PetscScalar       *y,alpha1,alpha2,alpha3;
417   PetscErrorCode    ierr;
418   PetscInt          n,i;
419   const PetscInt    m = b->AIJ->rmap->n,*idx;
420 
421   PetscFunctionBegin;
422   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
423   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
424   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
425 
426   for (i=0; i<m; i++) {
427     idx    = a->j + a->i[i];
428     v      = a->a + a->i[i];
429     n      = a->i[i+1] - a->i[i];
430     alpha1 = x[3*i];
431     alpha2 = x[3*i+1];
432     alpha3 = x[3*i+2];
433     while (n-->0) {
434       y[3*(*idx)]   += alpha1*(*v);
435       y[3*(*idx)+1] += alpha2*(*v);
436       y[3*(*idx)+2] += alpha3*(*v);
437       idx++; v++;
438     }
439   }
440   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
441   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
442   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
443   PetscFunctionReturn(0);
444 }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
448 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
449 {
450   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
451   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
452   const PetscScalar *x,*v;
453   PetscScalar       *y,sum1, sum2, sum3;
454   PetscErrorCode    ierr;
455   PetscInt          n,i,jrow,j;
456   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
457 
458   PetscFunctionBegin;
459   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
460   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
461   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
462   idx  = a->j;
463   v    = a->a;
464   ii   = a->i;
465 
466   for (i=0; i<m; i++) {
467     jrow = ii[i];
468     n    = ii[i+1] - jrow;
469     sum1 = 0.0;
470     sum2 = 0.0;
471     sum3 = 0.0;
472     for (j=0; j<n; j++) {
473       sum1 += v[jrow]*x[3*idx[jrow]];
474       sum2 += v[jrow]*x[3*idx[jrow]+1];
475       sum3 += v[jrow]*x[3*idx[jrow]+2];
476       jrow++;
477     }
478     y[3*i]   += sum1;
479     y[3*i+1] += sum2;
480     y[3*i+2] += sum3;
481   }
482 
483   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
484   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
485   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
486   PetscFunctionReturn(0);
487 }
488 #undef __FUNCT__
489 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
490 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
491 {
492   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
493   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
494   const PetscScalar *x,*v;
495   PetscScalar       *y,alpha1,alpha2,alpha3;
496   PetscErrorCode    ierr;
497   PetscInt          n,i;
498   const PetscInt    m = b->AIJ->rmap->n,*idx;
499 
500   PetscFunctionBegin;
501   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
502   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
503   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
504   for (i=0; i<m; i++) {
505     idx    = a->j + a->i[i];
506     v      = a->a + a->i[i];
507     n      = a->i[i+1] - a->i[i];
508     alpha1 = x[3*i];
509     alpha2 = x[3*i+1];
510     alpha3 = x[3*i+2];
511     while (n-->0) {
512       y[3*(*idx)]   += alpha1*(*v);
513       y[3*(*idx)+1] += alpha2*(*v);
514       y[3*(*idx)+2] += alpha3*(*v);
515       idx++; v++;
516     }
517   }
518   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
519   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
520   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
521   PetscFunctionReturn(0);
522 }
523 
524 /* ------------------------------------------------------------------------------*/
525 #undef __FUNCT__
526 #define __FUNCT__ "MatMult_SeqMAIJ_4"
527 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
528 {
529   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
530   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
531   const PetscScalar *x,*v;
532   PetscScalar       *y,sum1, sum2, sum3, sum4;
533   PetscErrorCode    ierr;
534   PetscInt          nonzerorow=0,n,i,jrow,j;
535   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
536 
537   PetscFunctionBegin;
538   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
539   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
540   idx  = a->j;
541   v    = a->a;
542   ii   = a->i;
543 
544   for (i=0; i<m; i++) {
545     jrow        = ii[i];
546     n           = ii[i+1] - jrow;
547     sum1        = 0.0;
548     sum2        = 0.0;
549     sum3        = 0.0;
550     sum4        = 0.0;
551     nonzerorow += (n>0);
552     for (j=0; j<n; j++) {
553       sum1 += v[jrow]*x[4*idx[jrow]];
554       sum2 += v[jrow]*x[4*idx[jrow]+1];
555       sum3 += v[jrow]*x[4*idx[jrow]+2];
556       sum4 += v[jrow]*x[4*idx[jrow]+3];
557       jrow++;
558     }
559     y[4*i]   = sum1;
560     y[4*i+1] = sum2;
561     y[4*i+2] = sum3;
562     y[4*i+3] = sum4;
563   }
564 
565   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
566   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
567   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
573 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
574 {
575   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
576   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
577   const PetscScalar *x,*v;
578   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
579   PetscErrorCode    ierr;
580   PetscInt          n,i;
581   const PetscInt    m = b->AIJ->rmap->n,*idx;
582 
583   PetscFunctionBegin;
584   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
585   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
586   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
587   for (i=0; i<m; i++) {
588     idx    = a->j + a->i[i];
589     v      = a->a + a->i[i];
590     n      = a->i[i+1] - a->i[i];
591     alpha1 = x[4*i];
592     alpha2 = x[4*i+1];
593     alpha3 = x[4*i+2];
594     alpha4 = x[4*i+3];
595     while (n-->0) {
596       y[4*(*idx)]   += alpha1*(*v);
597       y[4*(*idx)+1] += alpha2*(*v);
598       y[4*(*idx)+2] += alpha3*(*v);
599       y[4*(*idx)+3] += alpha4*(*v);
600       idx++; v++;
601     }
602   }
603   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
604   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
605   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
606   PetscFunctionReturn(0);
607 }
608 
609 #undef __FUNCT__
610 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
611 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
612 {
613   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
614   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
615   const PetscScalar *x,*v;
616   PetscScalar       *y,sum1, sum2, sum3, sum4;
617   PetscErrorCode    ierr;
618   PetscInt          n,i,jrow,j;
619   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
620 
621   PetscFunctionBegin;
622   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
623   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
624   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
625   idx  = a->j;
626   v    = a->a;
627   ii   = a->i;
628 
629   for (i=0; i<m; i++) {
630     jrow = ii[i];
631     n    = ii[i+1] - jrow;
632     sum1 = 0.0;
633     sum2 = 0.0;
634     sum3 = 0.0;
635     sum4 = 0.0;
636     for (j=0; j<n; j++) {
637       sum1 += v[jrow]*x[4*idx[jrow]];
638       sum2 += v[jrow]*x[4*idx[jrow]+1];
639       sum3 += v[jrow]*x[4*idx[jrow]+2];
640       sum4 += v[jrow]*x[4*idx[jrow]+3];
641       jrow++;
642     }
643     y[4*i]   += sum1;
644     y[4*i+1] += sum2;
645     y[4*i+2] += sum3;
646     y[4*i+3] += sum4;
647   }
648 
649   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
650   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
651   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
652   PetscFunctionReturn(0);
653 }
654 #undef __FUNCT__
655 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
656 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
657 {
658   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
659   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
660   const PetscScalar *x,*v;
661   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
662   PetscErrorCode    ierr;
663   PetscInt          n,i;
664   const PetscInt    m = b->AIJ->rmap->n,*idx;
665 
666   PetscFunctionBegin;
667   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
668   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
669   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
670 
671   for (i=0; i<m; i++) {
672     idx    = a->j + a->i[i];
673     v      = a->a + a->i[i];
674     n      = a->i[i+1] - a->i[i];
675     alpha1 = x[4*i];
676     alpha2 = x[4*i+1];
677     alpha3 = x[4*i+2];
678     alpha4 = x[4*i+3];
679     while (n-->0) {
680       y[4*(*idx)]   += alpha1*(*v);
681       y[4*(*idx)+1] += alpha2*(*v);
682       y[4*(*idx)+2] += alpha3*(*v);
683       y[4*(*idx)+3] += alpha4*(*v);
684       idx++; v++;
685     }
686   }
687   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
688   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
689   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
690   PetscFunctionReturn(0);
691 }
692 /* ------------------------------------------------------------------------------*/
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatMult_SeqMAIJ_5"
696 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
697 {
698   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
699   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
700   const PetscScalar *x,*v;
701   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
702   PetscErrorCode    ierr;
703   PetscInt          nonzerorow=0,n,i,jrow,j;
704   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
705 
706   PetscFunctionBegin;
707   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
708   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
709   idx  = a->j;
710   v    = a->a;
711   ii   = a->i;
712 
713   for (i=0; i<m; i++) {
714     jrow  = ii[i];
715     n     = ii[i+1] - jrow;
716     sum1  = 0.0;
717     sum2  = 0.0;
718     sum3  = 0.0;
719     sum4  = 0.0;
720     sum5  = 0.0;
721 
722     nonzerorow += (n>0);
723     for (j=0; j<n; j++) {
724       sum1 += v[jrow]*x[5*idx[jrow]];
725       sum2 += v[jrow]*x[5*idx[jrow]+1];
726       sum3 += v[jrow]*x[5*idx[jrow]+2];
727       sum4 += v[jrow]*x[5*idx[jrow]+3];
728       sum5 += v[jrow]*x[5*idx[jrow]+4];
729       jrow++;
730     }
731     y[5*i]   = sum1;
732     y[5*i+1] = sum2;
733     y[5*i+2] = sum3;
734     y[5*i+3] = sum4;
735     y[5*i+4] = sum5;
736   }
737 
738   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
739   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
740   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
746 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
747 {
748   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
749   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
750   const PetscScalar *x,*v;
751   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
752   PetscErrorCode    ierr;
753   PetscInt          n,i;
754   const PetscInt    m = b->AIJ->rmap->n,*idx;
755 
756   PetscFunctionBegin;
757   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
758   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
759   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
760 
761   for (i=0; i<m; i++) {
762     idx    = a->j + a->i[i];
763     v      = a->a + a->i[i];
764     n      = a->i[i+1] - a->i[i];
765     alpha1 = x[5*i];
766     alpha2 = x[5*i+1];
767     alpha3 = x[5*i+2];
768     alpha4 = x[5*i+3];
769     alpha5 = x[5*i+4];
770     while (n-->0) {
771       y[5*(*idx)]   += alpha1*(*v);
772       y[5*(*idx)+1] += alpha2*(*v);
773       y[5*(*idx)+2] += alpha3*(*v);
774       y[5*(*idx)+3] += alpha4*(*v);
775       y[5*(*idx)+4] += alpha5*(*v);
776       idx++; v++;
777     }
778   }
779   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
780   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
781   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
782   PetscFunctionReturn(0);
783 }
784 
785 #undef __FUNCT__
786 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
787 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
788 {
789   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
790   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
791   const PetscScalar *x,*v;
792   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
793   PetscErrorCode    ierr;
794   PetscInt          n,i,jrow,j;
795   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
796 
797   PetscFunctionBegin;
798   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
799   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
800   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
801   idx  = a->j;
802   v    = a->a;
803   ii   = a->i;
804 
805   for (i=0; i<m; i++) {
806     jrow = ii[i];
807     n    = ii[i+1] - jrow;
808     sum1 = 0.0;
809     sum2 = 0.0;
810     sum3 = 0.0;
811     sum4 = 0.0;
812     sum5 = 0.0;
813     for (j=0; j<n; j++) {
814       sum1 += v[jrow]*x[5*idx[jrow]];
815       sum2 += v[jrow]*x[5*idx[jrow]+1];
816       sum3 += v[jrow]*x[5*idx[jrow]+2];
817       sum4 += v[jrow]*x[5*idx[jrow]+3];
818       sum5 += v[jrow]*x[5*idx[jrow]+4];
819       jrow++;
820     }
821     y[5*i]   += sum1;
822     y[5*i+1] += sum2;
823     y[5*i+2] += sum3;
824     y[5*i+3] += sum4;
825     y[5*i+4] += sum5;
826   }
827 
828   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
829   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
830   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
831   PetscFunctionReturn(0);
832 }
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
836 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
837 {
838   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
839   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
840   const PetscScalar *x,*v;
841   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
842   PetscErrorCode    ierr;
843   PetscInt          n,i;
844   const PetscInt    m = b->AIJ->rmap->n,*idx;
845 
846   PetscFunctionBegin;
847   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
848   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
849   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
850 
851   for (i=0; i<m; i++) {
852     idx    = a->j + a->i[i];
853     v      = a->a + a->i[i];
854     n      = a->i[i+1] - a->i[i];
855     alpha1 = x[5*i];
856     alpha2 = x[5*i+1];
857     alpha3 = x[5*i+2];
858     alpha4 = x[5*i+3];
859     alpha5 = x[5*i+4];
860     while (n-->0) {
861       y[5*(*idx)]   += alpha1*(*v);
862       y[5*(*idx)+1] += alpha2*(*v);
863       y[5*(*idx)+2] += alpha3*(*v);
864       y[5*(*idx)+3] += alpha4*(*v);
865       y[5*(*idx)+4] += alpha5*(*v);
866       idx++; v++;
867     }
868   }
869   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
870   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
871   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
872   PetscFunctionReturn(0);
873 }
874 
875 /* ------------------------------------------------------------------------------*/
876 #undef __FUNCT__
877 #define __FUNCT__ "MatMult_SeqMAIJ_6"
878 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
879 {
880   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
881   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
882   const PetscScalar *x,*v;
883   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
884   PetscErrorCode    ierr;
885   PetscInt          nonzerorow=0,n,i,jrow,j;
886   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
887 
888   PetscFunctionBegin;
889   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
890   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
891   idx  = a->j;
892   v    = a->a;
893   ii   = a->i;
894 
895   for (i=0; i<m; i++) {
896     jrow  = ii[i];
897     n     = ii[i+1] - jrow;
898     sum1  = 0.0;
899     sum2  = 0.0;
900     sum3  = 0.0;
901     sum4  = 0.0;
902     sum5  = 0.0;
903     sum6  = 0.0;
904 
905     nonzerorow += (n>0);
906     for (j=0; j<n; j++) {
907       sum1 += v[jrow]*x[6*idx[jrow]];
908       sum2 += v[jrow]*x[6*idx[jrow]+1];
909       sum3 += v[jrow]*x[6*idx[jrow]+2];
910       sum4 += v[jrow]*x[6*idx[jrow]+3];
911       sum5 += v[jrow]*x[6*idx[jrow]+4];
912       sum6 += v[jrow]*x[6*idx[jrow]+5];
913       jrow++;
914     }
915     y[6*i]   = sum1;
916     y[6*i+1] = sum2;
917     y[6*i+2] = sum3;
918     y[6*i+3] = sum4;
919     y[6*i+4] = sum5;
920     y[6*i+5] = sum6;
921   }
922 
923   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
924   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
925   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
926   PetscFunctionReturn(0);
927 }
928 
929 #undef __FUNCT__
930 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
931 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
932 {
933   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
934   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
935   const PetscScalar *x,*v;
936   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
937   PetscErrorCode    ierr;
938   PetscInt          n,i;
939   const PetscInt    m = b->AIJ->rmap->n,*idx;
940 
941   PetscFunctionBegin;
942   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
943   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
944   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
945 
946   for (i=0; i<m; i++) {
947     idx    = a->j + a->i[i];
948     v      = a->a + a->i[i];
949     n      = a->i[i+1] - a->i[i];
950     alpha1 = x[6*i];
951     alpha2 = x[6*i+1];
952     alpha3 = x[6*i+2];
953     alpha4 = x[6*i+3];
954     alpha5 = x[6*i+4];
955     alpha6 = x[6*i+5];
956     while (n-->0) {
957       y[6*(*idx)]   += alpha1*(*v);
958       y[6*(*idx)+1] += alpha2*(*v);
959       y[6*(*idx)+2] += alpha3*(*v);
960       y[6*(*idx)+3] += alpha4*(*v);
961       y[6*(*idx)+4] += alpha5*(*v);
962       y[6*(*idx)+5] += alpha6*(*v);
963       idx++; v++;
964     }
965   }
966   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
967   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
968   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
974 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
975 {
976   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
977   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
978   const PetscScalar *x,*v;
979   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
980   PetscErrorCode    ierr;
981   PetscInt          n,i,jrow,j;
982   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
983 
984   PetscFunctionBegin;
985   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
986   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
987   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
988   idx  = a->j;
989   v    = a->a;
990   ii   = a->i;
991 
992   for (i=0; i<m; i++) {
993     jrow = ii[i];
994     n    = ii[i+1] - jrow;
995     sum1 = 0.0;
996     sum2 = 0.0;
997     sum3 = 0.0;
998     sum4 = 0.0;
999     sum5 = 0.0;
1000     sum6 = 0.0;
1001     for (j=0; j<n; j++) {
1002       sum1 += v[jrow]*x[6*idx[jrow]];
1003       sum2 += v[jrow]*x[6*idx[jrow]+1];
1004       sum3 += v[jrow]*x[6*idx[jrow]+2];
1005       sum4 += v[jrow]*x[6*idx[jrow]+3];
1006       sum5 += v[jrow]*x[6*idx[jrow]+4];
1007       sum6 += v[jrow]*x[6*idx[jrow]+5];
1008       jrow++;
1009     }
1010     y[6*i]   += sum1;
1011     y[6*i+1] += sum2;
1012     y[6*i+2] += sum3;
1013     y[6*i+3] += sum4;
1014     y[6*i+4] += sum5;
1015     y[6*i+5] += sum6;
1016   }
1017 
1018   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1019   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1020   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
1026 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
1027 {
1028   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1029   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1030   const PetscScalar *x,*v;
1031   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1032   PetscErrorCode    ierr;
1033   PetscInt          n,i;
1034   const PetscInt    m = b->AIJ->rmap->n,*idx;
1035 
1036   PetscFunctionBegin;
1037   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1038   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1039   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1040 
1041   for (i=0; i<m; i++) {
1042     idx    = a->j + a->i[i];
1043     v      = a->a + a->i[i];
1044     n      = a->i[i+1] - a->i[i];
1045     alpha1 = x[6*i];
1046     alpha2 = x[6*i+1];
1047     alpha3 = x[6*i+2];
1048     alpha4 = x[6*i+3];
1049     alpha5 = x[6*i+4];
1050     alpha6 = x[6*i+5];
1051     while (n-->0) {
1052       y[6*(*idx)]   += alpha1*(*v);
1053       y[6*(*idx)+1] += alpha2*(*v);
1054       y[6*(*idx)+2] += alpha3*(*v);
1055       y[6*(*idx)+3] += alpha4*(*v);
1056       y[6*(*idx)+4] += alpha5*(*v);
1057       y[6*(*idx)+5] += alpha6*(*v);
1058       idx++; v++;
1059     }
1060   }
1061   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1062   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1063   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 /* ------------------------------------------------------------------------------*/
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatMult_SeqMAIJ_7"
1070 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1071 {
1072   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1073   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1074   const PetscScalar *x,*v;
1075   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1076   PetscErrorCode    ierr;
1077   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1078   PetscInt          nonzerorow=0,n,i,jrow,j;
1079 
1080   PetscFunctionBegin;
1081   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1082   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1083   idx  = a->j;
1084   v    = a->a;
1085   ii   = a->i;
1086 
1087   for (i=0; i<m; i++) {
1088     jrow  = ii[i];
1089     n     = ii[i+1] - jrow;
1090     sum1  = 0.0;
1091     sum2  = 0.0;
1092     sum3  = 0.0;
1093     sum4  = 0.0;
1094     sum5  = 0.0;
1095     sum6  = 0.0;
1096     sum7  = 0.0;
1097 
1098     nonzerorow += (n>0);
1099     for (j=0; j<n; j++) {
1100       sum1 += v[jrow]*x[7*idx[jrow]];
1101       sum2 += v[jrow]*x[7*idx[jrow]+1];
1102       sum3 += v[jrow]*x[7*idx[jrow]+2];
1103       sum4 += v[jrow]*x[7*idx[jrow]+3];
1104       sum5 += v[jrow]*x[7*idx[jrow]+4];
1105       sum6 += v[jrow]*x[7*idx[jrow]+5];
1106       sum7 += v[jrow]*x[7*idx[jrow]+6];
1107       jrow++;
1108     }
1109     y[7*i]   = sum1;
1110     y[7*i+1] = sum2;
1111     y[7*i+2] = sum3;
1112     y[7*i+3] = sum4;
1113     y[7*i+4] = sum5;
1114     y[7*i+5] = sum6;
1115     y[7*i+6] = sum7;
1116   }
1117 
1118   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
1119   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1120   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 #undef __FUNCT__
1125 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1126 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1127 {
1128   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1129   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1130   const PetscScalar *x,*v;
1131   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1132   PetscErrorCode    ierr;
1133   const PetscInt    m = b->AIJ->rmap->n,*idx;
1134   PetscInt          n,i;
1135 
1136   PetscFunctionBegin;
1137   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1138   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1139   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1140 
1141   for (i=0; i<m; i++) {
1142     idx    = a->j + a->i[i];
1143     v      = a->a + a->i[i];
1144     n      = a->i[i+1] - a->i[i];
1145     alpha1 = x[7*i];
1146     alpha2 = x[7*i+1];
1147     alpha3 = x[7*i+2];
1148     alpha4 = x[7*i+3];
1149     alpha5 = x[7*i+4];
1150     alpha6 = x[7*i+5];
1151     alpha7 = x[7*i+6];
1152     while (n-->0) {
1153       y[7*(*idx)]   += alpha1*(*v);
1154       y[7*(*idx)+1] += alpha2*(*v);
1155       y[7*(*idx)+2] += alpha3*(*v);
1156       y[7*(*idx)+3] += alpha4*(*v);
1157       y[7*(*idx)+4] += alpha5*(*v);
1158       y[7*(*idx)+5] += alpha6*(*v);
1159       y[7*(*idx)+6] += alpha7*(*v);
1160       idx++; v++;
1161     }
1162   }
1163   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1164   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1165   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 #undef __FUNCT__
1170 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1171 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1172 {
1173   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1174   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1175   const PetscScalar *x,*v;
1176   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1177   PetscErrorCode    ierr;
1178   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1179   PetscInt          n,i,jrow,j;
1180 
1181   PetscFunctionBegin;
1182   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1183   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1184   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1185   idx  = a->j;
1186   v    = a->a;
1187   ii   = a->i;
1188 
1189   for (i=0; i<m; i++) {
1190     jrow = ii[i];
1191     n    = ii[i+1] - jrow;
1192     sum1 = 0.0;
1193     sum2 = 0.0;
1194     sum3 = 0.0;
1195     sum4 = 0.0;
1196     sum5 = 0.0;
1197     sum6 = 0.0;
1198     sum7 = 0.0;
1199     for (j=0; j<n; j++) {
1200       sum1 += v[jrow]*x[7*idx[jrow]];
1201       sum2 += v[jrow]*x[7*idx[jrow]+1];
1202       sum3 += v[jrow]*x[7*idx[jrow]+2];
1203       sum4 += v[jrow]*x[7*idx[jrow]+3];
1204       sum5 += v[jrow]*x[7*idx[jrow]+4];
1205       sum6 += v[jrow]*x[7*idx[jrow]+5];
1206       sum7 += v[jrow]*x[7*idx[jrow]+6];
1207       jrow++;
1208     }
1209     y[7*i]   += sum1;
1210     y[7*i+1] += sum2;
1211     y[7*i+2] += sum3;
1212     y[7*i+3] += sum4;
1213     y[7*i+4] += sum5;
1214     y[7*i+5] += sum6;
1215     y[7*i+6] += sum7;
1216   }
1217 
1218   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1219   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1220   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1221   PetscFunctionReturn(0);
1222 }
1223 
1224 #undef __FUNCT__
1225 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1226 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1227 {
1228   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1229   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1230   const PetscScalar *x,*v;
1231   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1232   PetscErrorCode    ierr;
1233   const PetscInt    m = b->AIJ->rmap->n,*idx;
1234   PetscInt          n,i;
1235 
1236   PetscFunctionBegin;
1237   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1238   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1239   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1240   for (i=0; i<m; i++) {
1241     idx    = a->j + a->i[i];
1242     v      = a->a + a->i[i];
1243     n      = a->i[i+1] - a->i[i];
1244     alpha1 = x[7*i];
1245     alpha2 = x[7*i+1];
1246     alpha3 = x[7*i+2];
1247     alpha4 = x[7*i+3];
1248     alpha5 = x[7*i+4];
1249     alpha6 = x[7*i+5];
1250     alpha7 = x[7*i+6];
1251     while (n-->0) {
1252       y[7*(*idx)]   += alpha1*(*v);
1253       y[7*(*idx)+1] += alpha2*(*v);
1254       y[7*(*idx)+2] += alpha3*(*v);
1255       y[7*(*idx)+3] += alpha4*(*v);
1256       y[7*(*idx)+4] += alpha5*(*v);
1257       y[7*(*idx)+5] += alpha6*(*v);
1258       y[7*(*idx)+6] += alpha7*(*v);
1259       idx++; v++;
1260     }
1261   }
1262   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1263   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1264   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatMult_SeqMAIJ_8"
1270 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1271 {
1272   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1273   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1274   const PetscScalar *x,*v;
1275   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1276   PetscErrorCode    ierr;
1277   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1278   PetscInt          nonzerorow=0,n,i,jrow,j;
1279 
1280   PetscFunctionBegin;
1281   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1282   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1283   idx  = a->j;
1284   v    = a->a;
1285   ii   = a->i;
1286 
1287   for (i=0; i<m; i++) {
1288     jrow  = ii[i];
1289     n     = ii[i+1] - jrow;
1290     sum1  = 0.0;
1291     sum2  = 0.0;
1292     sum3  = 0.0;
1293     sum4  = 0.0;
1294     sum5  = 0.0;
1295     sum6  = 0.0;
1296     sum7  = 0.0;
1297     sum8  = 0.0;
1298 
1299     nonzerorow += (n>0);
1300     for (j=0; j<n; j++) {
1301       sum1 += v[jrow]*x[8*idx[jrow]];
1302       sum2 += v[jrow]*x[8*idx[jrow]+1];
1303       sum3 += v[jrow]*x[8*idx[jrow]+2];
1304       sum4 += v[jrow]*x[8*idx[jrow]+3];
1305       sum5 += v[jrow]*x[8*idx[jrow]+4];
1306       sum6 += v[jrow]*x[8*idx[jrow]+5];
1307       sum7 += v[jrow]*x[8*idx[jrow]+6];
1308       sum8 += v[jrow]*x[8*idx[jrow]+7];
1309       jrow++;
1310     }
1311     y[8*i]   = sum1;
1312     y[8*i+1] = sum2;
1313     y[8*i+2] = sum3;
1314     y[8*i+3] = sum4;
1315     y[8*i+4] = sum5;
1316     y[8*i+5] = sum6;
1317     y[8*i+6] = sum7;
1318     y[8*i+7] = sum8;
1319   }
1320 
1321   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
1322   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1323   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1329 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1330 {
1331   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1332   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1333   const PetscScalar *x,*v;
1334   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1335   PetscErrorCode    ierr;
1336   const PetscInt    m = b->AIJ->rmap->n,*idx;
1337   PetscInt          n,i;
1338 
1339   PetscFunctionBegin;
1340   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1341   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1342   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1343 
1344   for (i=0; i<m; i++) {
1345     idx    = a->j + a->i[i];
1346     v      = a->a + a->i[i];
1347     n      = a->i[i+1] - a->i[i];
1348     alpha1 = x[8*i];
1349     alpha2 = x[8*i+1];
1350     alpha3 = x[8*i+2];
1351     alpha4 = x[8*i+3];
1352     alpha5 = x[8*i+4];
1353     alpha6 = x[8*i+5];
1354     alpha7 = x[8*i+6];
1355     alpha8 = x[8*i+7];
1356     while (n-->0) {
1357       y[8*(*idx)]   += alpha1*(*v);
1358       y[8*(*idx)+1] += alpha2*(*v);
1359       y[8*(*idx)+2] += alpha3*(*v);
1360       y[8*(*idx)+3] += alpha4*(*v);
1361       y[8*(*idx)+4] += alpha5*(*v);
1362       y[8*(*idx)+5] += alpha6*(*v);
1363       y[8*(*idx)+6] += alpha7*(*v);
1364       y[8*(*idx)+7] += alpha8*(*v);
1365       idx++; v++;
1366     }
1367   }
1368   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1369   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1370   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1376 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1377 {
1378   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1379   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1380   const PetscScalar *x,*v;
1381   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1382   PetscErrorCode    ierr;
1383   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1384   PetscInt          n,i,jrow,j;
1385 
1386   PetscFunctionBegin;
1387   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1388   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1389   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1390   idx  = a->j;
1391   v    = a->a;
1392   ii   = a->i;
1393 
1394   for (i=0; i<m; i++) {
1395     jrow = ii[i];
1396     n    = ii[i+1] - jrow;
1397     sum1 = 0.0;
1398     sum2 = 0.0;
1399     sum3 = 0.0;
1400     sum4 = 0.0;
1401     sum5 = 0.0;
1402     sum6 = 0.0;
1403     sum7 = 0.0;
1404     sum8 = 0.0;
1405     for (j=0; j<n; j++) {
1406       sum1 += v[jrow]*x[8*idx[jrow]];
1407       sum2 += v[jrow]*x[8*idx[jrow]+1];
1408       sum3 += v[jrow]*x[8*idx[jrow]+2];
1409       sum4 += v[jrow]*x[8*idx[jrow]+3];
1410       sum5 += v[jrow]*x[8*idx[jrow]+4];
1411       sum6 += v[jrow]*x[8*idx[jrow]+5];
1412       sum7 += v[jrow]*x[8*idx[jrow]+6];
1413       sum8 += v[jrow]*x[8*idx[jrow]+7];
1414       jrow++;
1415     }
1416     y[8*i]   += sum1;
1417     y[8*i+1] += sum2;
1418     y[8*i+2] += sum3;
1419     y[8*i+3] += sum4;
1420     y[8*i+4] += sum5;
1421     y[8*i+5] += sum6;
1422     y[8*i+6] += sum7;
1423     y[8*i+7] += sum8;
1424   }
1425 
1426   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1427   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1428   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1434 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1435 {
1436   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1437   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1438   const PetscScalar *x,*v;
1439   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1440   PetscErrorCode    ierr;
1441   const PetscInt    m = b->AIJ->rmap->n,*idx;
1442   PetscInt          n,i;
1443 
1444   PetscFunctionBegin;
1445   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1446   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1447   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1448   for (i=0; i<m; i++) {
1449     idx    = a->j + a->i[i];
1450     v      = a->a + a->i[i];
1451     n      = a->i[i+1] - a->i[i];
1452     alpha1 = x[8*i];
1453     alpha2 = x[8*i+1];
1454     alpha3 = x[8*i+2];
1455     alpha4 = x[8*i+3];
1456     alpha5 = x[8*i+4];
1457     alpha6 = x[8*i+5];
1458     alpha7 = x[8*i+6];
1459     alpha8 = x[8*i+7];
1460     while (n-->0) {
1461       y[8*(*idx)]   += alpha1*(*v);
1462       y[8*(*idx)+1] += alpha2*(*v);
1463       y[8*(*idx)+2] += alpha3*(*v);
1464       y[8*(*idx)+3] += alpha4*(*v);
1465       y[8*(*idx)+4] += alpha5*(*v);
1466       y[8*(*idx)+5] += alpha6*(*v);
1467       y[8*(*idx)+6] += alpha7*(*v);
1468       y[8*(*idx)+7] += alpha8*(*v);
1469       idx++; v++;
1470     }
1471   }
1472   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1473   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1474   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 /* ------------------------------------------------------------------------------*/
1479 #undef __FUNCT__
1480 #define __FUNCT__ "MatMult_SeqMAIJ_9"
1481 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1482 {
1483   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1484   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1485   const PetscScalar *x,*v;
1486   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1487   PetscErrorCode    ierr;
1488   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1489   PetscInt          nonzerorow=0,n,i,jrow,j;
1490 
1491   PetscFunctionBegin;
1492   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1493   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1494   idx  = a->j;
1495   v    = a->a;
1496   ii   = a->i;
1497 
1498   for (i=0; i<m; i++) {
1499     jrow  = ii[i];
1500     n     = ii[i+1] - jrow;
1501     sum1  = 0.0;
1502     sum2  = 0.0;
1503     sum3  = 0.0;
1504     sum4  = 0.0;
1505     sum5  = 0.0;
1506     sum6  = 0.0;
1507     sum7  = 0.0;
1508     sum8  = 0.0;
1509     sum9  = 0.0;
1510 
1511     nonzerorow += (n>0);
1512     for (j=0; j<n; j++) {
1513       sum1 += v[jrow]*x[9*idx[jrow]];
1514       sum2 += v[jrow]*x[9*idx[jrow]+1];
1515       sum3 += v[jrow]*x[9*idx[jrow]+2];
1516       sum4 += v[jrow]*x[9*idx[jrow]+3];
1517       sum5 += v[jrow]*x[9*idx[jrow]+4];
1518       sum6 += v[jrow]*x[9*idx[jrow]+5];
1519       sum7 += v[jrow]*x[9*idx[jrow]+6];
1520       sum8 += v[jrow]*x[9*idx[jrow]+7];
1521       sum9 += v[jrow]*x[9*idx[jrow]+8];
1522       jrow++;
1523     }
1524     y[9*i]   = sum1;
1525     y[9*i+1] = sum2;
1526     y[9*i+2] = sum3;
1527     y[9*i+3] = sum4;
1528     y[9*i+4] = sum5;
1529     y[9*i+5] = sum6;
1530     y[9*i+6] = sum7;
1531     y[9*i+7] = sum8;
1532     y[9*i+8] = sum9;
1533   }
1534 
1535   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
1536   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1537   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 /* ------------------------------------------------------------------------------*/
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1545 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1546 {
1547   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1548   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1549   const PetscScalar *x,*v;
1550   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1551   PetscErrorCode    ierr;
1552   const PetscInt    m = b->AIJ->rmap->n,*idx;
1553   PetscInt          n,i;
1554 
1555   PetscFunctionBegin;
1556   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1557   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1558   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1559 
1560   for (i=0; i<m; i++) {
1561     idx    = a->j + a->i[i];
1562     v      = a->a + a->i[i];
1563     n      = a->i[i+1] - a->i[i];
1564     alpha1 = x[9*i];
1565     alpha2 = x[9*i+1];
1566     alpha3 = x[9*i+2];
1567     alpha4 = x[9*i+3];
1568     alpha5 = x[9*i+4];
1569     alpha6 = x[9*i+5];
1570     alpha7 = x[9*i+6];
1571     alpha8 = x[9*i+7];
1572     alpha9 = x[9*i+8];
1573     while (n-->0) {
1574       y[9*(*idx)]   += alpha1*(*v);
1575       y[9*(*idx)+1] += alpha2*(*v);
1576       y[9*(*idx)+2] += alpha3*(*v);
1577       y[9*(*idx)+3] += alpha4*(*v);
1578       y[9*(*idx)+4] += alpha5*(*v);
1579       y[9*(*idx)+5] += alpha6*(*v);
1580       y[9*(*idx)+6] += alpha7*(*v);
1581       y[9*(*idx)+7] += alpha8*(*v);
1582       y[9*(*idx)+8] += alpha9*(*v);
1583       idx++; v++;
1584     }
1585   }
1586   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1587   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1588   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1589   PetscFunctionReturn(0);
1590 }
1591 
1592 #undef __FUNCT__
1593 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1594 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1595 {
1596   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1597   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1598   const PetscScalar *x,*v;
1599   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1600   PetscErrorCode    ierr;
1601   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1602   PetscInt          n,i,jrow,j;
1603 
1604   PetscFunctionBegin;
1605   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1606   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1607   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1608   idx  = a->j;
1609   v    = a->a;
1610   ii   = a->i;
1611 
1612   for (i=0; i<m; i++) {
1613     jrow = ii[i];
1614     n    = ii[i+1] - jrow;
1615     sum1 = 0.0;
1616     sum2 = 0.0;
1617     sum3 = 0.0;
1618     sum4 = 0.0;
1619     sum5 = 0.0;
1620     sum6 = 0.0;
1621     sum7 = 0.0;
1622     sum8 = 0.0;
1623     sum9 = 0.0;
1624     for (j=0; j<n; j++) {
1625       sum1 += v[jrow]*x[9*idx[jrow]];
1626       sum2 += v[jrow]*x[9*idx[jrow]+1];
1627       sum3 += v[jrow]*x[9*idx[jrow]+2];
1628       sum4 += v[jrow]*x[9*idx[jrow]+3];
1629       sum5 += v[jrow]*x[9*idx[jrow]+4];
1630       sum6 += v[jrow]*x[9*idx[jrow]+5];
1631       sum7 += v[jrow]*x[9*idx[jrow]+6];
1632       sum8 += v[jrow]*x[9*idx[jrow]+7];
1633       sum9 += v[jrow]*x[9*idx[jrow]+8];
1634       jrow++;
1635     }
1636     y[9*i]   += sum1;
1637     y[9*i+1] += sum2;
1638     y[9*i+2] += sum3;
1639     y[9*i+3] += sum4;
1640     y[9*i+4] += sum5;
1641     y[9*i+5] += sum6;
1642     y[9*i+6] += sum7;
1643     y[9*i+7] += sum8;
1644     y[9*i+8] += sum9;
1645   }
1646 
1647   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1648   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1649   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1650   PetscFunctionReturn(0);
1651 }
1652 
1653 #undef __FUNCT__
1654 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1655 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1656 {
1657   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1658   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1659   const PetscScalar *x,*v;
1660   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1661   PetscErrorCode    ierr;
1662   const PetscInt    m = b->AIJ->rmap->n,*idx;
1663   PetscInt          n,i;
1664 
1665   PetscFunctionBegin;
1666   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1667   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1668   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1669   for (i=0; i<m; i++) {
1670     idx    = a->j + a->i[i];
1671     v      = a->a + a->i[i];
1672     n      = a->i[i+1] - a->i[i];
1673     alpha1 = x[9*i];
1674     alpha2 = x[9*i+1];
1675     alpha3 = x[9*i+2];
1676     alpha4 = x[9*i+3];
1677     alpha5 = x[9*i+4];
1678     alpha6 = x[9*i+5];
1679     alpha7 = x[9*i+6];
1680     alpha8 = x[9*i+7];
1681     alpha9 = x[9*i+8];
1682     while (n-->0) {
1683       y[9*(*idx)]   += alpha1*(*v);
1684       y[9*(*idx)+1] += alpha2*(*v);
1685       y[9*(*idx)+2] += alpha3*(*v);
1686       y[9*(*idx)+3] += alpha4*(*v);
1687       y[9*(*idx)+4] += alpha5*(*v);
1688       y[9*(*idx)+5] += alpha6*(*v);
1689       y[9*(*idx)+6] += alpha7*(*v);
1690       y[9*(*idx)+7] += alpha8*(*v);
1691       y[9*(*idx)+8] += alpha9*(*v);
1692       idx++; v++;
1693     }
1694   }
1695   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1696   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1697   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1698   PetscFunctionReturn(0);
1699 }
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1702 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1703 {
1704   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1705   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1706   const PetscScalar *x,*v;
1707   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1708   PetscErrorCode    ierr;
1709   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1710   PetscInt          nonzerorow=0,n,i,jrow,j;
1711 
1712   PetscFunctionBegin;
1713   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1714   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1715   idx  = a->j;
1716   v    = a->a;
1717   ii   = a->i;
1718 
1719   for (i=0; i<m; i++) {
1720     jrow  = ii[i];
1721     n     = ii[i+1] - jrow;
1722     sum1  = 0.0;
1723     sum2  = 0.0;
1724     sum3  = 0.0;
1725     sum4  = 0.0;
1726     sum5  = 0.0;
1727     sum6  = 0.0;
1728     sum7  = 0.0;
1729     sum8  = 0.0;
1730     sum9  = 0.0;
1731     sum10 = 0.0;
1732 
1733     nonzerorow += (n>0);
1734     for (j=0; j<n; j++) {
1735       sum1  += v[jrow]*x[10*idx[jrow]];
1736       sum2  += v[jrow]*x[10*idx[jrow]+1];
1737       sum3  += v[jrow]*x[10*idx[jrow]+2];
1738       sum4  += v[jrow]*x[10*idx[jrow]+3];
1739       sum5  += v[jrow]*x[10*idx[jrow]+4];
1740       sum6  += v[jrow]*x[10*idx[jrow]+5];
1741       sum7  += v[jrow]*x[10*idx[jrow]+6];
1742       sum8  += v[jrow]*x[10*idx[jrow]+7];
1743       sum9  += v[jrow]*x[10*idx[jrow]+8];
1744       sum10 += v[jrow]*x[10*idx[jrow]+9];
1745       jrow++;
1746     }
1747     y[10*i]   = sum1;
1748     y[10*i+1] = sum2;
1749     y[10*i+2] = sum3;
1750     y[10*i+3] = sum4;
1751     y[10*i+4] = sum5;
1752     y[10*i+5] = sum6;
1753     y[10*i+6] = sum7;
1754     y[10*i+7] = sum8;
1755     y[10*i+8] = sum9;
1756     y[10*i+9] = sum10;
1757   }
1758 
1759   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
1760   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1761   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 #undef __FUNCT__
1766 #define __FUNCT__ "MatMultAdd_SeqMAIJ_10"
1767 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1768 {
1769   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1770   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1771   const PetscScalar *x,*v;
1772   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1773   PetscErrorCode    ierr;
1774   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1775   PetscInt          n,i,jrow,j;
1776 
1777   PetscFunctionBegin;
1778   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1779   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1780   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1781   idx  = a->j;
1782   v    = a->a;
1783   ii   = a->i;
1784 
1785   for (i=0; i<m; i++) {
1786     jrow  = ii[i];
1787     n     = ii[i+1] - jrow;
1788     sum1  = 0.0;
1789     sum2  = 0.0;
1790     sum3  = 0.0;
1791     sum4  = 0.0;
1792     sum5  = 0.0;
1793     sum6  = 0.0;
1794     sum7  = 0.0;
1795     sum8  = 0.0;
1796     sum9  = 0.0;
1797     sum10 = 0.0;
1798     for (j=0; j<n; j++) {
1799       sum1  += v[jrow]*x[10*idx[jrow]];
1800       sum2  += v[jrow]*x[10*idx[jrow]+1];
1801       sum3  += v[jrow]*x[10*idx[jrow]+2];
1802       sum4  += v[jrow]*x[10*idx[jrow]+3];
1803       sum5  += v[jrow]*x[10*idx[jrow]+4];
1804       sum6  += v[jrow]*x[10*idx[jrow]+5];
1805       sum7  += v[jrow]*x[10*idx[jrow]+6];
1806       sum8  += v[jrow]*x[10*idx[jrow]+7];
1807       sum9  += v[jrow]*x[10*idx[jrow]+8];
1808       sum10 += v[jrow]*x[10*idx[jrow]+9];
1809       jrow++;
1810     }
1811     y[10*i]   += sum1;
1812     y[10*i+1] += sum2;
1813     y[10*i+2] += sum3;
1814     y[10*i+3] += sum4;
1815     y[10*i+4] += sum5;
1816     y[10*i+5] += sum6;
1817     y[10*i+6] += sum7;
1818     y[10*i+7] += sum8;
1819     y[10*i+8] += sum9;
1820     y[10*i+9] += sum10;
1821   }
1822 
1823   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1824   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1825   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 #undef __FUNCT__
1830 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1831 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1832 {
1833   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1834   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1835   const PetscScalar *x,*v;
1836   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1837   PetscErrorCode    ierr;
1838   const PetscInt    m = b->AIJ->rmap->n,*idx;
1839   PetscInt          n,i;
1840 
1841   PetscFunctionBegin;
1842   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1843   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1844   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1845 
1846   for (i=0; i<m; i++) {
1847     idx     = a->j + a->i[i];
1848     v       = a->a + a->i[i];
1849     n       = a->i[i+1] - a->i[i];
1850     alpha1  = x[10*i];
1851     alpha2  = x[10*i+1];
1852     alpha3  = x[10*i+2];
1853     alpha4  = x[10*i+3];
1854     alpha5  = x[10*i+4];
1855     alpha6  = x[10*i+5];
1856     alpha7  = x[10*i+6];
1857     alpha8  = x[10*i+7];
1858     alpha9  = x[10*i+8];
1859     alpha10 = x[10*i+9];
1860     while (n-->0) {
1861       y[10*(*idx)]   += alpha1*(*v);
1862       y[10*(*idx)+1] += alpha2*(*v);
1863       y[10*(*idx)+2] += alpha3*(*v);
1864       y[10*(*idx)+3] += alpha4*(*v);
1865       y[10*(*idx)+4] += alpha5*(*v);
1866       y[10*(*idx)+5] += alpha6*(*v);
1867       y[10*(*idx)+6] += alpha7*(*v);
1868       y[10*(*idx)+7] += alpha8*(*v);
1869       y[10*(*idx)+8] += alpha9*(*v);
1870       y[10*(*idx)+9] += alpha10*(*v);
1871       idx++; v++;
1872     }
1873   }
1874   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1875   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1876   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 #undef __FUNCT__
1881 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1882 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1883 {
1884   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1885   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1886   const PetscScalar *x,*v;
1887   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1888   PetscErrorCode    ierr;
1889   const PetscInt    m = b->AIJ->rmap->n,*idx;
1890   PetscInt          n,i;
1891 
1892   PetscFunctionBegin;
1893   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1894   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1895   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1896   for (i=0; i<m; i++) {
1897     idx     = a->j + a->i[i];
1898     v       = a->a + a->i[i];
1899     n       = a->i[i+1] - a->i[i];
1900     alpha1  = x[10*i];
1901     alpha2  = x[10*i+1];
1902     alpha3  = x[10*i+2];
1903     alpha4  = x[10*i+3];
1904     alpha5  = x[10*i+4];
1905     alpha6  = x[10*i+5];
1906     alpha7  = x[10*i+6];
1907     alpha8  = x[10*i+7];
1908     alpha9  = x[10*i+8];
1909     alpha10 = x[10*i+9];
1910     while (n-->0) {
1911       y[10*(*idx)]   += alpha1*(*v);
1912       y[10*(*idx)+1] += alpha2*(*v);
1913       y[10*(*idx)+2] += alpha3*(*v);
1914       y[10*(*idx)+3] += alpha4*(*v);
1915       y[10*(*idx)+4] += alpha5*(*v);
1916       y[10*(*idx)+5] += alpha6*(*v);
1917       y[10*(*idx)+6] += alpha7*(*v);
1918       y[10*(*idx)+7] += alpha8*(*v);
1919       y[10*(*idx)+8] += alpha9*(*v);
1920       y[10*(*idx)+9] += alpha10*(*v);
1921       idx++; v++;
1922     }
1923   }
1924   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1925   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1926   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 
1931 /*--------------------------------------------------------------------------------------------*/
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatMult_SeqMAIJ_11"
1934 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1935 {
1936   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1937   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1938   const PetscScalar *x,*v;
1939   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1940   PetscErrorCode    ierr;
1941   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1942   PetscInt          nonzerorow=0,n,i,jrow,j;
1943 
1944   PetscFunctionBegin;
1945   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1946   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1947   idx  = a->j;
1948   v    = a->a;
1949   ii   = a->i;
1950 
1951   for (i=0; i<m; i++) {
1952     jrow  = ii[i];
1953     n     = ii[i+1] - jrow;
1954     sum1  = 0.0;
1955     sum2  = 0.0;
1956     sum3  = 0.0;
1957     sum4  = 0.0;
1958     sum5  = 0.0;
1959     sum6  = 0.0;
1960     sum7  = 0.0;
1961     sum8  = 0.0;
1962     sum9  = 0.0;
1963     sum10 = 0.0;
1964     sum11 = 0.0;
1965 
1966     nonzerorow += (n>0);
1967     for (j=0; j<n; j++) {
1968       sum1  += v[jrow]*x[11*idx[jrow]];
1969       sum2  += v[jrow]*x[11*idx[jrow]+1];
1970       sum3  += v[jrow]*x[11*idx[jrow]+2];
1971       sum4  += v[jrow]*x[11*idx[jrow]+3];
1972       sum5  += v[jrow]*x[11*idx[jrow]+4];
1973       sum6  += v[jrow]*x[11*idx[jrow]+5];
1974       sum7  += v[jrow]*x[11*idx[jrow]+6];
1975       sum8  += v[jrow]*x[11*idx[jrow]+7];
1976       sum9  += v[jrow]*x[11*idx[jrow]+8];
1977       sum10 += v[jrow]*x[11*idx[jrow]+9];
1978       sum11 += v[jrow]*x[11*idx[jrow]+10];
1979       jrow++;
1980     }
1981     y[11*i]    = sum1;
1982     y[11*i+1]  = sum2;
1983     y[11*i+2]  = sum3;
1984     y[11*i+3]  = sum4;
1985     y[11*i+4]  = sum5;
1986     y[11*i+5]  = sum6;
1987     y[11*i+6]  = sum7;
1988     y[11*i+7]  = sum8;
1989     y[11*i+8]  = sum9;
1990     y[11*i+9]  = sum10;
1991     y[11*i+10] = sum11;
1992   }
1993 
1994   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
1995   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1996   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 #undef __FUNCT__
2001 #define __FUNCT__ "MatMultAdd_SeqMAIJ_11"
2002 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2003 {
2004   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2005   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2006   const PetscScalar *x,*v;
2007   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
2008   PetscErrorCode    ierr;
2009   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2010   PetscInt          n,i,jrow,j;
2011 
2012   PetscFunctionBegin;
2013   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2014   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2015   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2016   idx  = a->j;
2017   v    = a->a;
2018   ii   = a->i;
2019 
2020   for (i=0; i<m; i++) {
2021     jrow  = ii[i];
2022     n     = ii[i+1] - jrow;
2023     sum1  = 0.0;
2024     sum2  = 0.0;
2025     sum3  = 0.0;
2026     sum4  = 0.0;
2027     sum5  = 0.0;
2028     sum6  = 0.0;
2029     sum7  = 0.0;
2030     sum8  = 0.0;
2031     sum9  = 0.0;
2032     sum10 = 0.0;
2033     sum11 = 0.0;
2034     for (j=0; j<n; j++) {
2035       sum1  += v[jrow]*x[11*idx[jrow]];
2036       sum2  += v[jrow]*x[11*idx[jrow]+1];
2037       sum3  += v[jrow]*x[11*idx[jrow]+2];
2038       sum4  += v[jrow]*x[11*idx[jrow]+3];
2039       sum5  += v[jrow]*x[11*idx[jrow]+4];
2040       sum6  += v[jrow]*x[11*idx[jrow]+5];
2041       sum7  += v[jrow]*x[11*idx[jrow]+6];
2042       sum8  += v[jrow]*x[11*idx[jrow]+7];
2043       sum9  += v[jrow]*x[11*idx[jrow]+8];
2044       sum10 += v[jrow]*x[11*idx[jrow]+9];
2045       sum11 += v[jrow]*x[11*idx[jrow]+10];
2046       jrow++;
2047     }
2048     y[11*i]    += sum1;
2049     y[11*i+1]  += sum2;
2050     y[11*i+2]  += sum3;
2051     y[11*i+3]  += sum4;
2052     y[11*i+4]  += sum5;
2053     y[11*i+5]  += sum6;
2054     y[11*i+6]  += sum7;
2055     y[11*i+7]  += sum8;
2056     y[11*i+8]  += sum9;
2057     y[11*i+9]  += sum10;
2058     y[11*i+10] += sum11;
2059   }
2060 
2061   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2062   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2063   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 #undef __FUNCT__
2068 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11"
2069 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2070 {
2071   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2072   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2073   const PetscScalar *x,*v;
2074   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2075   PetscErrorCode    ierr;
2076   const PetscInt    m = b->AIJ->rmap->n,*idx;
2077   PetscInt          n,i;
2078 
2079   PetscFunctionBegin;
2080   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2081   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2082   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2083 
2084   for (i=0; i<m; i++) {
2085     idx     = a->j + a->i[i];
2086     v       = a->a + a->i[i];
2087     n       = a->i[i+1] - a->i[i];
2088     alpha1  = x[11*i];
2089     alpha2  = x[11*i+1];
2090     alpha3  = x[11*i+2];
2091     alpha4  = x[11*i+3];
2092     alpha5  = x[11*i+4];
2093     alpha6  = x[11*i+5];
2094     alpha7  = x[11*i+6];
2095     alpha8  = x[11*i+7];
2096     alpha9  = x[11*i+8];
2097     alpha10 = x[11*i+9];
2098     alpha11 = x[11*i+10];
2099     while (n-->0) {
2100       y[11*(*idx)]    += alpha1*(*v);
2101       y[11*(*idx)+1]  += alpha2*(*v);
2102       y[11*(*idx)+2]  += alpha3*(*v);
2103       y[11*(*idx)+3]  += alpha4*(*v);
2104       y[11*(*idx)+4]  += alpha5*(*v);
2105       y[11*(*idx)+5]  += alpha6*(*v);
2106       y[11*(*idx)+6]  += alpha7*(*v);
2107       y[11*(*idx)+7]  += alpha8*(*v);
2108       y[11*(*idx)+8]  += alpha9*(*v);
2109       y[11*(*idx)+9]  += alpha10*(*v);
2110       y[11*(*idx)+10] += alpha11*(*v);
2111       idx++; v++;
2112     }
2113   }
2114   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2115   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2116   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 #undef __FUNCT__
2121 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11"
2122 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2123 {
2124   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2125   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2126   const PetscScalar *x,*v;
2127   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2128   PetscErrorCode    ierr;
2129   const PetscInt    m = b->AIJ->rmap->n,*idx;
2130   PetscInt          n,i;
2131 
2132   PetscFunctionBegin;
2133   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2134   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2135   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2136   for (i=0; i<m; i++) {
2137     idx     = a->j + a->i[i];
2138     v       = a->a + a->i[i];
2139     n       = a->i[i+1] - a->i[i];
2140     alpha1  = x[11*i];
2141     alpha2  = x[11*i+1];
2142     alpha3  = x[11*i+2];
2143     alpha4  = x[11*i+3];
2144     alpha5  = x[11*i+4];
2145     alpha6  = x[11*i+5];
2146     alpha7  = x[11*i+6];
2147     alpha8  = x[11*i+7];
2148     alpha9  = x[11*i+8];
2149     alpha10 = x[11*i+9];
2150     alpha11 = x[11*i+10];
2151     while (n-->0) {
2152       y[11*(*idx)]    += alpha1*(*v);
2153       y[11*(*idx)+1]  += alpha2*(*v);
2154       y[11*(*idx)+2]  += alpha3*(*v);
2155       y[11*(*idx)+3]  += alpha4*(*v);
2156       y[11*(*idx)+4]  += alpha5*(*v);
2157       y[11*(*idx)+5]  += alpha6*(*v);
2158       y[11*(*idx)+6]  += alpha7*(*v);
2159       y[11*(*idx)+7]  += alpha8*(*v);
2160       y[11*(*idx)+8]  += alpha9*(*v);
2161       y[11*(*idx)+9]  += alpha10*(*v);
2162       y[11*(*idx)+10] += alpha11*(*v);
2163       idx++; v++;
2164     }
2165   }
2166   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2167   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2168   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2169   PetscFunctionReturn(0);
2170 }
2171 
2172 
2173 /*--------------------------------------------------------------------------------------------*/
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatMult_SeqMAIJ_16"
2176 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2177 {
2178   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2179   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2180   const PetscScalar *x,*v;
2181   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2182   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2183   PetscErrorCode    ierr;
2184   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2185   PetscInt          nonzerorow=0,n,i,jrow,j;
2186 
2187   PetscFunctionBegin;
2188   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2189   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2190   idx  = a->j;
2191   v    = a->a;
2192   ii   = a->i;
2193 
2194   for (i=0; i<m; i++) {
2195     jrow  = ii[i];
2196     n     = ii[i+1] - jrow;
2197     sum1  = 0.0;
2198     sum2  = 0.0;
2199     sum3  = 0.0;
2200     sum4  = 0.0;
2201     sum5  = 0.0;
2202     sum6  = 0.0;
2203     sum7  = 0.0;
2204     sum8  = 0.0;
2205     sum9  = 0.0;
2206     sum10 = 0.0;
2207     sum11 = 0.0;
2208     sum12 = 0.0;
2209     sum13 = 0.0;
2210     sum14 = 0.0;
2211     sum15 = 0.0;
2212     sum16 = 0.0;
2213 
2214     nonzerorow += (n>0);
2215     for (j=0; j<n; j++) {
2216       sum1  += v[jrow]*x[16*idx[jrow]];
2217       sum2  += v[jrow]*x[16*idx[jrow]+1];
2218       sum3  += v[jrow]*x[16*idx[jrow]+2];
2219       sum4  += v[jrow]*x[16*idx[jrow]+3];
2220       sum5  += v[jrow]*x[16*idx[jrow]+4];
2221       sum6  += v[jrow]*x[16*idx[jrow]+5];
2222       sum7  += v[jrow]*x[16*idx[jrow]+6];
2223       sum8  += v[jrow]*x[16*idx[jrow]+7];
2224       sum9  += v[jrow]*x[16*idx[jrow]+8];
2225       sum10 += v[jrow]*x[16*idx[jrow]+9];
2226       sum11 += v[jrow]*x[16*idx[jrow]+10];
2227       sum12 += v[jrow]*x[16*idx[jrow]+11];
2228       sum13 += v[jrow]*x[16*idx[jrow]+12];
2229       sum14 += v[jrow]*x[16*idx[jrow]+13];
2230       sum15 += v[jrow]*x[16*idx[jrow]+14];
2231       sum16 += v[jrow]*x[16*idx[jrow]+15];
2232       jrow++;
2233     }
2234     y[16*i]    = sum1;
2235     y[16*i+1]  = sum2;
2236     y[16*i+2]  = sum3;
2237     y[16*i+3]  = sum4;
2238     y[16*i+4]  = sum5;
2239     y[16*i+5]  = sum6;
2240     y[16*i+6]  = sum7;
2241     y[16*i+7]  = sum8;
2242     y[16*i+8]  = sum9;
2243     y[16*i+9]  = sum10;
2244     y[16*i+10] = sum11;
2245     y[16*i+11] = sum12;
2246     y[16*i+12] = sum13;
2247     y[16*i+13] = sum14;
2248     y[16*i+14] = sum15;
2249     y[16*i+15] = sum16;
2250   }
2251 
2252   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
2253   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2254   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 #undef __FUNCT__
2259 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
2260 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2261 {
2262   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2263   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2264   const PetscScalar *x,*v;
2265   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2266   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2267   PetscErrorCode    ierr;
2268   const PetscInt    m = b->AIJ->rmap->n,*idx;
2269   PetscInt          n,i;
2270 
2271   PetscFunctionBegin;
2272   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2273   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2274   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2275 
2276   for (i=0; i<m; i++) {
2277     idx     = a->j + a->i[i];
2278     v       = a->a + a->i[i];
2279     n       = a->i[i+1] - a->i[i];
2280     alpha1  = x[16*i];
2281     alpha2  = x[16*i+1];
2282     alpha3  = x[16*i+2];
2283     alpha4  = x[16*i+3];
2284     alpha5  = x[16*i+4];
2285     alpha6  = x[16*i+5];
2286     alpha7  = x[16*i+6];
2287     alpha8  = x[16*i+7];
2288     alpha9  = x[16*i+8];
2289     alpha10 = x[16*i+9];
2290     alpha11 = x[16*i+10];
2291     alpha12 = x[16*i+11];
2292     alpha13 = x[16*i+12];
2293     alpha14 = x[16*i+13];
2294     alpha15 = x[16*i+14];
2295     alpha16 = x[16*i+15];
2296     while (n-->0) {
2297       y[16*(*idx)]    += alpha1*(*v);
2298       y[16*(*idx)+1]  += alpha2*(*v);
2299       y[16*(*idx)+2]  += alpha3*(*v);
2300       y[16*(*idx)+3]  += alpha4*(*v);
2301       y[16*(*idx)+4]  += alpha5*(*v);
2302       y[16*(*idx)+5]  += alpha6*(*v);
2303       y[16*(*idx)+6]  += alpha7*(*v);
2304       y[16*(*idx)+7]  += alpha8*(*v);
2305       y[16*(*idx)+8]  += alpha9*(*v);
2306       y[16*(*idx)+9]  += alpha10*(*v);
2307       y[16*(*idx)+10] += alpha11*(*v);
2308       y[16*(*idx)+11] += alpha12*(*v);
2309       y[16*(*idx)+12] += alpha13*(*v);
2310       y[16*(*idx)+13] += alpha14*(*v);
2311       y[16*(*idx)+14] += alpha15*(*v);
2312       y[16*(*idx)+15] += alpha16*(*v);
2313       idx++; v++;
2314     }
2315   }
2316   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2317   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2318   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 #undef __FUNCT__
2323 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
2324 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2325 {
2326   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2327   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2328   const PetscScalar *x,*v;
2329   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2330   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2331   PetscErrorCode    ierr;
2332   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2333   PetscInt          n,i,jrow,j;
2334 
2335   PetscFunctionBegin;
2336   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2337   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2338   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2339   idx  = a->j;
2340   v    = a->a;
2341   ii   = a->i;
2342 
2343   for (i=0; i<m; i++) {
2344     jrow  = ii[i];
2345     n     = ii[i+1] - jrow;
2346     sum1  = 0.0;
2347     sum2  = 0.0;
2348     sum3  = 0.0;
2349     sum4  = 0.0;
2350     sum5  = 0.0;
2351     sum6  = 0.0;
2352     sum7  = 0.0;
2353     sum8  = 0.0;
2354     sum9  = 0.0;
2355     sum10 = 0.0;
2356     sum11 = 0.0;
2357     sum12 = 0.0;
2358     sum13 = 0.0;
2359     sum14 = 0.0;
2360     sum15 = 0.0;
2361     sum16 = 0.0;
2362     for (j=0; j<n; j++) {
2363       sum1  += v[jrow]*x[16*idx[jrow]];
2364       sum2  += v[jrow]*x[16*idx[jrow]+1];
2365       sum3  += v[jrow]*x[16*idx[jrow]+2];
2366       sum4  += v[jrow]*x[16*idx[jrow]+3];
2367       sum5  += v[jrow]*x[16*idx[jrow]+4];
2368       sum6  += v[jrow]*x[16*idx[jrow]+5];
2369       sum7  += v[jrow]*x[16*idx[jrow]+6];
2370       sum8  += v[jrow]*x[16*idx[jrow]+7];
2371       sum9  += v[jrow]*x[16*idx[jrow]+8];
2372       sum10 += v[jrow]*x[16*idx[jrow]+9];
2373       sum11 += v[jrow]*x[16*idx[jrow]+10];
2374       sum12 += v[jrow]*x[16*idx[jrow]+11];
2375       sum13 += v[jrow]*x[16*idx[jrow]+12];
2376       sum14 += v[jrow]*x[16*idx[jrow]+13];
2377       sum15 += v[jrow]*x[16*idx[jrow]+14];
2378       sum16 += v[jrow]*x[16*idx[jrow]+15];
2379       jrow++;
2380     }
2381     y[16*i]    += sum1;
2382     y[16*i+1]  += sum2;
2383     y[16*i+2]  += sum3;
2384     y[16*i+3]  += sum4;
2385     y[16*i+4]  += sum5;
2386     y[16*i+5]  += sum6;
2387     y[16*i+6]  += sum7;
2388     y[16*i+7]  += sum8;
2389     y[16*i+8]  += sum9;
2390     y[16*i+9]  += sum10;
2391     y[16*i+10] += sum11;
2392     y[16*i+11] += sum12;
2393     y[16*i+12] += sum13;
2394     y[16*i+13] += sum14;
2395     y[16*i+14] += sum15;
2396     y[16*i+15] += sum16;
2397   }
2398 
2399   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2400   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2401   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 #undef __FUNCT__
2406 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2407 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2408 {
2409   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2410   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2411   const PetscScalar *x,*v;
2412   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2413   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2414   PetscErrorCode    ierr;
2415   const PetscInt    m = b->AIJ->rmap->n,*idx;
2416   PetscInt          n,i;
2417 
2418   PetscFunctionBegin;
2419   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2420   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2421   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2422   for (i=0; i<m; i++) {
2423     idx     = a->j + a->i[i];
2424     v       = a->a + a->i[i];
2425     n       = a->i[i+1] - a->i[i];
2426     alpha1  = x[16*i];
2427     alpha2  = x[16*i+1];
2428     alpha3  = x[16*i+2];
2429     alpha4  = x[16*i+3];
2430     alpha5  = x[16*i+4];
2431     alpha6  = x[16*i+5];
2432     alpha7  = x[16*i+6];
2433     alpha8  = x[16*i+7];
2434     alpha9  = x[16*i+8];
2435     alpha10 = x[16*i+9];
2436     alpha11 = x[16*i+10];
2437     alpha12 = x[16*i+11];
2438     alpha13 = x[16*i+12];
2439     alpha14 = x[16*i+13];
2440     alpha15 = x[16*i+14];
2441     alpha16 = x[16*i+15];
2442     while (n-->0) {
2443       y[16*(*idx)]    += alpha1*(*v);
2444       y[16*(*idx)+1]  += alpha2*(*v);
2445       y[16*(*idx)+2]  += alpha3*(*v);
2446       y[16*(*idx)+3]  += alpha4*(*v);
2447       y[16*(*idx)+4]  += alpha5*(*v);
2448       y[16*(*idx)+5]  += alpha6*(*v);
2449       y[16*(*idx)+6]  += alpha7*(*v);
2450       y[16*(*idx)+7]  += alpha8*(*v);
2451       y[16*(*idx)+8]  += alpha9*(*v);
2452       y[16*(*idx)+9]  += alpha10*(*v);
2453       y[16*(*idx)+10] += alpha11*(*v);
2454       y[16*(*idx)+11] += alpha12*(*v);
2455       y[16*(*idx)+12] += alpha13*(*v);
2456       y[16*(*idx)+13] += alpha14*(*v);
2457       y[16*(*idx)+14] += alpha15*(*v);
2458       y[16*(*idx)+15] += alpha16*(*v);
2459       idx++; v++;
2460     }
2461   }
2462   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2463   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2464   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2465   PetscFunctionReturn(0);
2466 }
2467 
2468 /*--------------------------------------------------------------------------------------------*/
2469 #undef __FUNCT__
2470 #define __FUNCT__ "MatMult_SeqMAIJ_18"
2471 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2472 {
2473   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2474   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2475   const PetscScalar *x,*v;
2476   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2477   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2478   PetscErrorCode    ierr;
2479   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2480   PetscInt          nonzerorow=0,n,i,jrow,j;
2481 
2482   PetscFunctionBegin;
2483   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2484   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2485   idx  = a->j;
2486   v    = a->a;
2487   ii   = a->i;
2488 
2489   for (i=0; i<m; i++) {
2490     jrow  = ii[i];
2491     n     = ii[i+1] - jrow;
2492     sum1  = 0.0;
2493     sum2  = 0.0;
2494     sum3  = 0.0;
2495     sum4  = 0.0;
2496     sum5  = 0.0;
2497     sum6  = 0.0;
2498     sum7  = 0.0;
2499     sum8  = 0.0;
2500     sum9  = 0.0;
2501     sum10 = 0.0;
2502     sum11 = 0.0;
2503     sum12 = 0.0;
2504     sum13 = 0.0;
2505     sum14 = 0.0;
2506     sum15 = 0.0;
2507     sum16 = 0.0;
2508     sum17 = 0.0;
2509     sum18 = 0.0;
2510 
2511     nonzerorow += (n>0);
2512     for (j=0; j<n; j++) {
2513       sum1  += v[jrow]*x[18*idx[jrow]];
2514       sum2  += v[jrow]*x[18*idx[jrow]+1];
2515       sum3  += v[jrow]*x[18*idx[jrow]+2];
2516       sum4  += v[jrow]*x[18*idx[jrow]+3];
2517       sum5  += v[jrow]*x[18*idx[jrow]+4];
2518       sum6  += v[jrow]*x[18*idx[jrow]+5];
2519       sum7  += v[jrow]*x[18*idx[jrow]+6];
2520       sum8  += v[jrow]*x[18*idx[jrow]+7];
2521       sum9  += v[jrow]*x[18*idx[jrow]+8];
2522       sum10 += v[jrow]*x[18*idx[jrow]+9];
2523       sum11 += v[jrow]*x[18*idx[jrow]+10];
2524       sum12 += v[jrow]*x[18*idx[jrow]+11];
2525       sum13 += v[jrow]*x[18*idx[jrow]+12];
2526       sum14 += v[jrow]*x[18*idx[jrow]+13];
2527       sum15 += v[jrow]*x[18*idx[jrow]+14];
2528       sum16 += v[jrow]*x[18*idx[jrow]+15];
2529       sum17 += v[jrow]*x[18*idx[jrow]+16];
2530       sum18 += v[jrow]*x[18*idx[jrow]+17];
2531       jrow++;
2532     }
2533     y[18*i]    = sum1;
2534     y[18*i+1]  = sum2;
2535     y[18*i+2]  = sum3;
2536     y[18*i+3]  = sum4;
2537     y[18*i+4]  = sum5;
2538     y[18*i+5]  = sum6;
2539     y[18*i+6]  = sum7;
2540     y[18*i+7]  = sum8;
2541     y[18*i+8]  = sum9;
2542     y[18*i+9]  = sum10;
2543     y[18*i+10] = sum11;
2544     y[18*i+11] = sum12;
2545     y[18*i+12] = sum13;
2546     y[18*i+13] = sum14;
2547     y[18*i+14] = sum15;
2548     y[18*i+15] = sum16;
2549     y[18*i+16] = sum17;
2550     y[18*i+17] = sum18;
2551   }
2552 
2553   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
2554   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2555   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 #undef __FUNCT__
2560 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18"
2561 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2562 {
2563   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2564   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2565   const PetscScalar *x,*v;
2566   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2567   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2568   PetscErrorCode    ierr;
2569   const PetscInt    m = b->AIJ->rmap->n,*idx;
2570   PetscInt          n,i;
2571 
2572   PetscFunctionBegin;
2573   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2574   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2575   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2576 
2577   for (i=0; i<m; i++) {
2578     idx     = a->j + a->i[i];
2579     v       = a->a + a->i[i];
2580     n       = a->i[i+1] - a->i[i];
2581     alpha1  = x[18*i];
2582     alpha2  = x[18*i+1];
2583     alpha3  = x[18*i+2];
2584     alpha4  = x[18*i+3];
2585     alpha5  = x[18*i+4];
2586     alpha6  = x[18*i+5];
2587     alpha7  = x[18*i+6];
2588     alpha8  = x[18*i+7];
2589     alpha9  = x[18*i+8];
2590     alpha10 = x[18*i+9];
2591     alpha11 = x[18*i+10];
2592     alpha12 = x[18*i+11];
2593     alpha13 = x[18*i+12];
2594     alpha14 = x[18*i+13];
2595     alpha15 = x[18*i+14];
2596     alpha16 = x[18*i+15];
2597     alpha17 = x[18*i+16];
2598     alpha18 = x[18*i+17];
2599     while (n-->0) {
2600       y[18*(*idx)]    += alpha1*(*v);
2601       y[18*(*idx)+1]  += alpha2*(*v);
2602       y[18*(*idx)+2]  += alpha3*(*v);
2603       y[18*(*idx)+3]  += alpha4*(*v);
2604       y[18*(*idx)+4]  += alpha5*(*v);
2605       y[18*(*idx)+5]  += alpha6*(*v);
2606       y[18*(*idx)+6]  += alpha7*(*v);
2607       y[18*(*idx)+7]  += alpha8*(*v);
2608       y[18*(*idx)+8]  += alpha9*(*v);
2609       y[18*(*idx)+9]  += alpha10*(*v);
2610       y[18*(*idx)+10] += alpha11*(*v);
2611       y[18*(*idx)+11] += alpha12*(*v);
2612       y[18*(*idx)+12] += alpha13*(*v);
2613       y[18*(*idx)+13] += alpha14*(*v);
2614       y[18*(*idx)+14] += alpha15*(*v);
2615       y[18*(*idx)+15] += alpha16*(*v);
2616       y[18*(*idx)+16] += alpha17*(*v);
2617       y[18*(*idx)+17] += alpha18*(*v);
2618       idx++; v++;
2619     }
2620   }
2621   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2622   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2623   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2624   PetscFunctionReturn(0);
2625 }
2626 
2627 #undef __FUNCT__
2628 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18"
2629 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2630 {
2631   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2632   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2633   const PetscScalar *x,*v;
2634   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2635   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2636   PetscErrorCode    ierr;
2637   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2638   PetscInt          n,i,jrow,j;
2639 
2640   PetscFunctionBegin;
2641   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2642   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2643   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2644   idx  = a->j;
2645   v    = a->a;
2646   ii   = a->i;
2647 
2648   for (i=0; i<m; i++) {
2649     jrow  = ii[i];
2650     n     = ii[i+1] - jrow;
2651     sum1  = 0.0;
2652     sum2  = 0.0;
2653     sum3  = 0.0;
2654     sum4  = 0.0;
2655     sum5  = 0.0;
2656     sum6  = 0.0;
2657     sum7  = 0.0;
2658     sum8  = 0.0;
2659     sum9  = 0.0;
2660     sum10 = 0.0;
2661     sum11 = 0.0;
2662     sum12 = 0.0;
2663     sum13 = 0.0;
2664     sum14 = 0.0;
2665     sum15 = 0.0;
2666     sum16 = 0.0;
2667     sum17 = 0.0;
2668     sum18 = 0.0;
2669     for (j=0; j<n; j++) {
2670       sum1  += v[jrow]*x[18*idx[jrow]];
2671       sum2  += v[jrow]*x[18*idx[jrow]+1];
2672       sum3  += v[jrow]*x[18*idx[jrow]+2];
2673       sum4  += v[jrow]*x[18*idx[jrow]+3];
2674       sum5  += v[jrow]*x[18*idx[jrow]+4];
2675       sum6  += v[jrow]*x[18*idx[jrow]+5];
2676       sum7  += v[jrow]*x[18*idx[jrow]+6];
2677       sum8  += v[jrow]*x[18*idx[jrow]+7];
2678       sum9  += v[jrow]*x[18*idx[jrow]+8];
2679       sum10 += v[jrow]*x[18*idx[jrow]+9];
2680       sum11 += v[jrow]*x[18*idx[jrow]+10];
2681       sum12 += v[jrow]*x[18*idx[jrow]+11];
2682       sum13 += v[jrow]*x[18*idx[jrow]+12];
2683       sum14 += v[jrow]*x[18*idx[jrow]+13];
2684       sum15 += v[jrow]*x[18*idx[jrow]+14];
2685       sum16 += v[jrow]*x[18*idx[jrow]+15];
2686       sum17 += v[jrow]*x[18*idx[jrow]+16];
2687       sum18 += v[jrow]*x[18*idx[jrow]+17];
2688       jrow++;
2689     }
2690     y[18*i]    += sum1;
2691     y[18*i+1]  += sum2;
2692     y[18*i+2]  += sum3;
2693     y[18*i+3]  += sum4;
2694     y[18*i+4]  += sum5;
2695     y[18*i+5]  += sum6;
2696     y[18*i+6]  += sum7;
2697     y[18*i+7]  += sum8;
2698     y[18*i+8]  += sum9;
2699     y[18*i+9]  += sum10;
2700     y[18*i+10] += sum11;
2701     y[18*i+11] += sum12;
2702     y[18*i+12] += sum13;
2703     y[18*i+13] += sum14;
2704     y[18*i+14] += sum15;
2705     y[18*i+15] += sum16;
2706     y[18*i+16] += sum17;
2707     y[18*i+17] += sum18;
2708   }
2709 
2710   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2711   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2712   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2713   PetscFunctionReturn(0);
2714 }
2715 
2716 #undef __FUNCT__
2717 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18"
2718 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2719 {
2720   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2721   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2722   const PetscScalar *x,*v;
2723   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2724   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2725   PetscErrorCode    ierr;
2726   const PetscInt    m = b->AIJ->rmap->n,*idx;
2727   PetscInt          n,i;
2728 
2729   PetscFunctionBegin;
2730   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2731   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2732   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2733   for (i=0; i<m; i++) {
2734     idx     = a->j + a->i[i];
2735     v       = a->a + a->i[i];
2736     n       = a->i[i+1] - a->i[i];
2737     alpha1  = x[18*i];
2738     alpha2  = x[18*i+1];
2739     alpha3  = x[18*i+2];
2740     alpha4  = x[18*i+3];
2741     alpha5  = x[18*i+4];
2742     alpha6  = x[18*i+5];
2743     alpha7  = x[18*i+6];
2744     alpha8  = x[18*i+7];
2745     alpha9  = x[18*i+8];
2746     alpha10 = x[18*i+9];
2747     alpha11 = x[18*i+10];
2748     alpha12 = x[18*i+11];
2749     alpha13 = x[18*i+12];
2750     alpha14 = x[18*i+13];
2751     alpha15 = x[18*i+14];
2752     alpha16 = x[18*i+15];
2753     alpha17 = x[18*i+16];
2754     alpha18 = x[18*i+17];
2755     while (n-->0) {
2756       y[18*(*idx)]    += alpha1*(*v);
2757       y[18*(*idx)+1]  += alpha2*(*v);
2758       y[18*(*idx)+2]  += alpha3*(*v);
2759       y[18*(*idx)+3]  += alpha4*(*v);
2760       y[18*(*idx)+4]  += alpha5*(*v);
2761       y[18*(*idx)+5]  += alpha6*(*v);
2762       y[18*(*idx)+6]  += alpha7*(*v);
2763       y[18*(*idx)+7]  += alpha8*(*v);
2764       y[18*(*idx)+8]  += alpha9*(*v);
2765       y[18*(*idx)+9]  += alpha10*(*v);
2766       y[18*(*idx)+10] += alpha11*(*v);
2767       y[18*(*idx)+11] += alpha12*(*v);
2768       y[18*(*idx)+12] += alpha13*(*v);
2769       y[18*(*idx)+13] += alpha14*(*v);
2770       y[18*(*idx)+14] += alpha15*(*v);
2771       y[18*(*idx)+15] += alpha16*(*v);
2772       y[18*(*idx)+16] += alpha17*(*v);
2773       y[18*(*idx)+17] += alpha18*(*v);
2774       idx++; v++;
2775     }
2776   }
2777   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2778   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2779   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2780   PetscFunctionReturn(0);
2781 }
2782 
2783 #undef __FUNCT__
2784 #define __FUNCT__ "MatMult_SeqMAIJ_N"
2785 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2786 {
2787   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2788   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2789   const PetscScalar *x,*v;
2790   PetscScalar       *y,*sums;
2791   PetscErrorCode    ierr;
2792   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2793   PetscInt          n,i,jrow,j,dof = b->dof,k;
2794 
2795   PetscFunctionBegin;
2796   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2797   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2798   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2799   idx  = a->j;
2800   v    = a->a;
2801   ii   = a->i;
2802 
2803   for (i=0; i<m; i++) {
2804     jrow = ii[i];
2805     n    = ii[i+1] - jrow;
2806     sums = y + dof*i;
2807     for (j=0; j<n; j++) {
2808       for (k=0; k<dof; k++) {
2809         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2810       }
2811       jrow++;
2812     }
2813   }
2814 
2815   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2816   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2817   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2818   PetscFunctionReturn(0);
2819 }
2820 
2821 #undef __FUNCT__
2822 #define __FUNCT__ "MatMultAdd_SeqMAIJ_N"
2823 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2824 {
2825   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2826   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2827   const PetscScalar *x,*v;
2828   PetscScalar       *y,*sums;
2829   PetscErrorCode    ierr;
2830   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2831   PetscInt          n,i,jrow,j,dof = b->dof,k;
2832 
2833   PetscFunctionBegin;
2834   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2835   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2836   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2837   idx  = a->j;
2838   v    = a->a;
2839   ii   = a->i;
2840 
2841   for (i=0; i<m; i++) {
2842     jrow = ii[i];
2843     n    = ii[i+1] - jrow;
2844     sums = y + dof*i;
2845     for (j=0; j<n; j++) {
2846       for (k=0; k<dof; k++) {
2847         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2848       }
2849       jrow++;
2850     }
2851   }
2852 
2853   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2854   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2855   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2856   PetscFunctionReturn(0);
2857 }
2858 
2859 #undef __FUNCT__
2860 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N"
2861 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2862 {
2863   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2864   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2865   const PetscScalar *x,*v,*alpha;
2866   PetscScalar       *y;
2867   PetscErrorCode    ierr;
2868   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2869   PetscInt          n,i,k;
2870 
2871   PetscFunctionBegin;
2872   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2873   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2874   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2875   for (i=0; i<m; i++) {
2876     idx   = a->j + a->i[i];
2877     v     = a->a + a->i[i];
2878     n     = a->i[i+1] - a->i[i];
2879     alpha = x + dof*i;
2880     while (n-->0) {
2881       for (k=0; k<dof; k++) {
2882         y[dof*(*idx)+k] += alpha[k]*(*v);
2883       }
2884       idx++; v++;
2885     }
2886   }
2887   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2888   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2889   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2890   PetscFunctionReturn(0);
2891 }
2892 
2893 #undef __FUNCT__
2894 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N"
2895 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2896 {
2897   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2898   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2899   const PetscScalar *x,*v,*alpha;
2900   PetscScalar       *y;
2901   PetscErrorCode    ierr;
2902   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2903   PetscInt          n,i,k;
2904 
2905   PetscFunctionBegin;
2906   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2907   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2908   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2909   for (i=0; i<m; i++) {
2910     idx   = a->j + a->i[i];
2911     v     = a->a + a->i[i];
2912     n     = a->i[i+1] - a->i[i];
2913     alpha = x + dof*i;
2914     while (n-->0) {
2915       for (k=0; k<dof; k++) {
2916         y[dof*(*idx)+k] += alpha[k]*(*v);
2917       }
2918       idx++; v++;
2919     }
2920   }
2921   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2922   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2923   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2924   PetscFunctionReturn(0);
2925 }
2926 
2927 /*===================================================================================*/
2928 #undef __FUNCT__
2929 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2930 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2931 {
2932   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2933   PetscErrorCode ierr;
2934 
2935   PetscFunctionBegin;
2936   /* start the scatter */
2937   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2938   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2939   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2940   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2941   PetscFunctionReturn(0);
2942 }
2943 
2944 #undef __FUNCT__
2945 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2946 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2947 {
2948   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2949   PetscErrorCode ierr;
2950 
2951   PetscFunctionBegin;
2952   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2953   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2954   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2955   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2956   PetscFunctionReturn(0);
2957 }
2958 
2959 #undef __FUNCT__
2960 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2961 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2962 {
2963   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2964   PetscErrorCode ierr;
2965 
2966   PetscFunctionBegin;
2967   /* start the scatter */
2968   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2969   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2970   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2971   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2972   PetscFunctionReturn(0);
2973 }
2974 
2975 #undef __FUNCT__
2976 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2977 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2978 {
2979   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2980   PetscErrorCode ierr;
2981 
2982   PetscFunctionBegin;
2983   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2984   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2985   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2986   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2987   PetscFunctionReturn(0);
2988 }
2989 
2990 /* ----------------------------------------------------------------*/
2991 #undef __FUNCT__
2992 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
2993 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2994 {
2995   PetscErrorCode     ierr;
2996   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2997   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2998   Mat                P         =pp->AIJ;
2999   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
3000   PetscInt           *pti,*ptj,*ptJ;
3001   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
3002   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
3003   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
3004   MatScalar          *ca;
3005   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
3006 
3007   PetscFunctionBegin;
3008   /* Get ij structure of P^T */
3009   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
3010 
3011   cn = pn*ppdof;
3012   /* Allocate ci array, arrays for fill computation and */
3013   /* free space for accumulating nonzero column info */
3014   ierr  = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr);
3015   ci[0] = 0;
3016 
3017   /* Work arrays for rows of P^T*A */
3018   ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr);
3019   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
3020   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
3021 
3022   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
3023   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
3024   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3025   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);CHKERRQ(ierr);
3026   current_space = free_space;
3027 
3028   /* Determine symbolic info for each row of C: */
3029   for (i=0; i<pn; i++) {
3030     ptnzi = pti[i+1] - pti[i];
3031     ptJ   = ptj + pti[i];
3032     for (dof=0; dof<ppdof; dof++) {
3033       ptanzi = 0;
3034       /* Determine symbolic row of PtA: */
3035       for (j=0; j<ptnzi; j++) {
3036         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
3037         arow = ptJ[j]*ppdof + dof;
3038         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
3039         anzj = ai[arow+1] - ai[arow];
3040         ajj  = aj + ai[arow];
3041         for (k=0; k<anzj; k++) {
3042           if (!ptadenserow[ajj[k]]) {
3043             ptadenserow[ajj[k]]    = -1;
3044             ptasparserow[ptanzi++] = ajj[k];
3045           }
3046         }
3047       }
3048       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
3049       ptaj = ptasparserow;
3050       cnzi = 0;
3051       for (j=0; j<ptanzi; j++) {
3052         /* Get offset within block of P */
3053         pshift = *ptaj%ppdof;
3054         /* Get block row of P */
3055         prow = (*ptaj++)/ppdof; /* integer division */
3056         /* P has same number of nonzeros per row as the compressed form */
3057         pnzj = pi[prow+1] - pi[prow];
3058         pjj  = pj + pi[prow];
3059         for (k=0;k<pnzj;k++) {
3060           /* Locations in C are shifted by the offset within the block */
3061           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3062           if (!denserow[pjj[k]*ppdof+pshift]) {
3063             denserow[pjj[k]*ppdof+pshift] = -1;
3064             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
3065           }
3066         }
3067       }
3068 
3069       /* sort sparserow */
3070       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
3071 
3072       /* If free space is not available, make more free space */
3073       /* Double the amount of total space in the list */
3074       if (current_space->local_remaining<cnzi) {
3075         ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
3076       }
3077 
3078       /* Copy data into free space, and zero out denserows */
3079       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
3080 
3081       current_space->array           += cnzi;
3082       current_space->local_used      += cnzi;
3083       current_space->local_remaining -= cnzi;
3084 
3085       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
3086       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
3087 
3088       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3089       /*        For now, we will recompute what is needed. */
3090       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3091     }
3092   }
3093   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3094   /* Allocate space for cj, initialize cj, and */
3095   /* destroy list of free space and other temporary array(s) */
3096   ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr);
3097   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
3098   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
3099 
3100   /* Allocate space for ca */
3101   ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr);
3102 
3103   /* put together the new matrix */
3104   ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
3105   ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr);
3106 
3107   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3108   /* Since these are PETSc arrays, change flags to free them as necessary. */
3109   c          = (Mat_SeqAIJ*)((*C)->data);
3110   c->free_a  = PETSC_TRUE;
3111   c->free_ij = PETSC_TRUE;
3112   c->nonew   = 0;
3113 
3114   (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3115 
3116   /* Clean up. */
3117   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
3118   PetscFunctionReturn(0);
3119 }
3120 
3121 #undef __FUNCT__
3122 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
3123 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3124 {
3125   /* This routine requires testing -- first draft only */
3126   PetscErrorCode  ierr;
3127   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3128   Mat             P  =pp->AIJ;
3129   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
3130   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
3131   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3132   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3133   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3134   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3135   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3136   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3137   MatScalar       *ca=c->a,*caj,*apa;
3138 
3139   PetscFunctionBegin;
3140   /* Allocate temporary array for storage of one row of A*P */
3141   ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr);
3142 
3143   /* Clear old values in C */
3144   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
3145 
3146   for (i=0; i<am; i++) {
3147     /* Form sparse row of A*P */
3148     anzi  = ai[i+1] - ai[i];
3149     apnzj = 0;
3150     for (j=0; j<anzi; j++) {
3151       /* Get offset within block of P */
3152       pshift = *aj%ppdof;
3153       /* Get block row of P */
3154       prow = *aj++/ppdof;   /* integer division */
3155       pnzj = pi[prow+1] - pi[prow];
3156       pjj  = pj + pi[prow];
3157       paj  = pa + pi[prow];
3158       for (k=0; k<pnzj; k++) {
3159         poffset = pjj[k]*ppdof+pshift;
3160         if (!apjdense[poffset]) {
3161           apjdense[poffset] = -1;
3162           apj[apnzj++]      = poffset;
3163         }
3164         apa[poffset] += (*aa)*paj[k];
3165       }
3166       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
3167       aa++;
3168     }
3169 
3170     /* Sort the j index array for quick sparse axpy. */
3171     /* Note: a array does not need sorting as it is in dense storage locations. */
3172     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
3173 
3174     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3175     prow    = i/ppdof; /* integer division */
3176     pshift  = i%ppdof;
3177     poffset = pi[prow];
3178     pnzi    = pi[prow+1] - poffset;
3179     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3180     pJ = pj+poffset;
3181     pA = pa+poffset;
3182     for (j=0; j<pnzi; j++) {
3183       crow = (*pJ)*ppdof+pshift;
3184       cjj  = cj + ci[crow];
3185       caj  = ca + ci[crow];
3186       pJ++;
3187       /* Perform sparse axpy operation.  Note cjj includes apj. */
3188       for (k=0,nextap=0; nextap<apnzj; k++) {
3189         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3190       }
3191       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
3192       pA++;
3193     }
3194 
3195     /* Zero the current row info for A*P */
3196     for (j=0; j<apnzj; j++) {
3197       apa[apj[j]]      = 0.;
3198       apjdense[apj[j]] = 0;
3199     }
3200   }
3201 
3202   /* Assemble the final matrix and clean up */
3203   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3204   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3205   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
3206   PetscFunctionReturn(0);
3207 }
3208 
3209 #undef __FUNCT__
3210 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqMAIJ"
3211 PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3212 {
3213   PetscErrorCode ierr;
3214 
3215   PetscFunctionBegin;
3216   if (scall == MAT_INITIAL_MATRIX) {
3217     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3218     ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr);
3219     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3220   }
3221   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3222   ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr);
3223   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3224   PetscFunctionReturn(0);
3225 }
3226 
3227 #undef __FUNCT__
3228 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ"
3229 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3230 {
3231   PetscErrorCode ierr;
3232 
3233   PetscFunctionBegin;
3234   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3235   ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr);
3236   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
3237   PetscFunctionReturn(0);
3238 }
3239 
3240 #undef __FUNCT__
3241 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ"
3242 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3243 {
3244   PetscFunctionBegin;
3245   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3246   PetscFunctionReturn(0);
3247 }
3248 
3249 #undef __FUNCT__
3250 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIMAIJ"
3251 PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3252 {
3253   PetscErrorCode ierr;
3254 
3255   PetscFunctionBegin;
3256   if (scall == MAT_INITIAL_MATRIX) {
3257     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3258     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr);
3259     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3260   }
3261   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3262   ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr);
3263   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3264   PetscFunctionReturn(0);
3265 }
3266 
3267 #undef __FUNCT__
3268 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
3269 PETSC_EXTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3270 {
3271   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3272   Mat            a    = b->AIJ,B;
3273   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3274   PetscErrorCode ierr;
3275   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3276   PetscInt       *cols;
3277   PetscScalar    *vals;
3278 
3279   PetscFunctionBegin;
3280   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3281   ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr);
3282   for (i=0; i<m; i++) {
3283     nmax = PetscMax(nmax,aij->ilen[i]);
3284     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3285   }
3286   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
3287   ierr = PetscFree(ilen);CHKERRQ(ierr);
3288   ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr);
3289   ii   = 0;
3290   for (i=0; i<m; i++) {
3291     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3292     for (j=0; j<dof; j++) {
3293       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3294       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3295       ii++;
3296     }
3297     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3298   }
3299   ierr = PetscFree(icols);CHKERRQ(ierr);
3300   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3301   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3302 
3303   if (reuse == MAT_REUSE_MATRIX) {
3304     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3305   } else {
3306     *newmat = B;
3307   }
3308   PetscFunctionReturn(0);
3309 }
3310 
3311 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3312 
3313 #undef __FUNCT__
3314 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
3315 PETSC_EXTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3316 {
3317   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3318   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3319   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3320   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3321   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3322   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3323   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3324   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3325   PetscInt       rstart,cstart,*garray,ii,k;
3326   PetscErrorCode ierr;
3327   PetscScalar    *vals,*ovals;
3328 
3329   PetscFunctionBegin;
3330   ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr);
3331   for (i=0; i<A->rmap->n/dof; i++) {
3332     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3333     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3334     for (j=0; j<dof; j++) {
3335       dnz[dof*i+j] = AIJ->ilen[i];
3336       onz[dof*i+j] = OAIJ->ilen[i];
3337     }
3338   }
3339   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
3340   ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr);
3341   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
3342 
3343   ierr   = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr);
3344   rstart = dof*maij->A->rmap->rstart;
3345   cstart = dof*maij->A->cmap->rstart;
3346   garray = mpiaij->garray;
3347 
3348   ii = rstart;
3349   for (i=0; i<A->rmap->n/dof; i++) {
3350     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3351     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3352     for (j=0; j<dof; j++) {
3353       for (k=0; k<ncols; k++) {
3354         icols[k] = cstart + dof*cols[k]+j;
3355       }
3356       for (k=0; k<oncols; k++) {
3357         oicols[k] = dof*garray[ocols[k]]+j;
3358       }
3359       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3360       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
3361       ii++;
3362     }
3363     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3364     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3365   }
3366   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
3367 
3368   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3369   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3370 
3371   if (reuse == MAT_REUSE_MATRIX) {
3372     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3373     ((PetscObject)A)->refct = 1;
3374 
3375     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3376 
3377     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3378   } else {
3379     *newmat = B;
3380   }
3381   PetscFunctionReturn(0);
3382 }
3383 
3384 #undef __FUNCT__
3385 #define __FUNCT__ "MatGetSubMatrix_MAIJ"
3386 PetscErrorCode  MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3387 {
3388   PetscErrorCode ierr;
3389   Mat            A;
3390 
3391   PetscFunctionBegin;
3392   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3393   ierr = MatGetSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
3394   ierr = MatDestroy(&A);CHKERRQ(ierr);
3395   PetscFunctionReturn(0);
3396 }
3397 
3398 /* ---------------------------------------------------------------------------------- */
3399 #undef __FUNCT__
3400 #define __FUNCT__ "MatCreateMAIJ"
3401 /*@C
3402   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3403   operations for multicomponent problems.  It interpolates each component the same
3404   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3405   and MATMPIAIJ for distributed matrices.
3406 
3407   Collective
3408 
3409   Input Parameters:
3410 + A - the AIJ matrix describing the action on blocks
3411 - dof - the block size (number of components per node)
3412 
3413   Output Parameter:
3414 . maij - the new MAIJ matrix
3415 
3416   Operations provided:
3417 + MatMult
3418 . MatMultTranspose
3419 . MatMultAdd
3420 . MatMultTransposeAdd
3421 - MatView
3422 
3423   Level: advanced
3424 
3425 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3426 @*/
3427 PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3428 {
3429   PetscErrorCode ierr;
3430   PetscMPIInt    size;
3431   PetscInt       n;
3432   Mat            B;
3433 
3434   PetscFunctionBegin;
3435   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3436 
3437   if (dof == 1) *maij = A;
3438   else {
3439     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3440     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3441     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3442     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3443     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3444     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3445 
3446     B->assembled = PETSC_TRUE;
3447 
3448     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3449     if (size == 1) {
3450       Mat_SeqMAIJ *b;
3451 
3452       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
3453 
3454       B->ops->setup   = NULL;
3455       B->ops->destroy = MatDestroy_SeqMAIJ;
3456       B->ops->view    = MatView_SeqMAIJ;
3457       b               = (Mat_SeqMAIJ*)B->data;
3458       b->dof          = dof;
3459       b->AIJ          = A;
3460 
3461       if (dof == 2) {
3462         B->ops->mult             = MatMult_SeqMAIJ_2;
3463         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3464         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3465         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3466       } else if (dof == 3) {
3467         B->ops->mult             = MatMult_SeqMAIJ_3;
3468         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3469         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3470         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3471       } else if (dof == 4) {
3472         B->ops->mult             = MatMult_SeqMAIJ_4;
3473         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3474         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3475         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3476       } else if (dof == 5) {
3477         B->ops->mult             = MatMult_SeqMAIJ_5;
3478         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3479         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3480         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3481       } else if (dof == 6) {
3482         B->ops->mult             = MatMult_SeqMAIJ_6;
3483         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3484         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3485         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3486       } else if (dof == 7) {
3487         B->ops->mult             = MatMult_SeqMAIJ_7;
3488         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3489         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3490         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3491       } else if (dof == 8) {
3492         B->ops->mult             = MatMult_SeqMAIJ_8;
3493         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3494         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3495         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3496       } else if (dof == 9) {
3497         B->ops->mult             = MatMult_SeqMAIJ_9;
3498         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3499         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3500         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3501       } else if (dof == 10) {
3502         B->ops->mult             = MatMult_SeqMAIJ_10;
3503         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3504         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3505         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3506       } else if (dof == 11) {
3507         B->ops->mult             = MatMult_SeqMAIJ_11;
3508         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3509         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3510         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3511       } else if (dof == 16) {
3512         B->ops->mult             = MatMult_SeqMAIJ_16;
3513         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3514         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3515         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3516       } else if (dof == 18) {
3517         B->ops->mult             = MatMult_SeqMAIJ_18;
3518         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3519         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3520         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3521       } else {
3522         B->ops->mult             = MatMult_SeqMAIJ_N;
3523         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3524         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3525         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3526       }
3527       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3528       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3529     } else {
3530       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3531       Mat_MPIMAIJ *b;
3532       IS          from,to;
3533       Vec         gvec;
3534 
3535       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3536 
3537       B->ops->setup   = NULL;
3538       B->ops->destroy = MatDestroy_MPIMAIJ;
3539       B->ops->view    = MatView_MPIMAIJ;
3540 
3541       b      = (Mat_MPIMAIJ*)B->data;
3542       b->dof = dof;
3543       b->A   = A;
3544 
3545       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
3546       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
3547 
3548       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3549       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3550       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
3551       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3552       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3553 
3554       /* create two temporary Index sets for build scatter gather */
3555       ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3556       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3557 
3558       /* create temporary global vector to generate scatter context */
3559       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
3560 
3561       /* generate the scatter context */
3562       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3563 
3564       ierr = ISDestroy(&from);CHKERRQ(ierr);
3565       ierr = ISDestroy(&to);CHKERRQ(ierr);
3566       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3567 
3568       B->ops->mult             = MatMult_MPIMAIJ_dof;
3569       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3570       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3571       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3572 
3573       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3574       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr);
3575     }
3576     B->ops->getsubmatrix = MatGetSubMatrix_MAIJ;
3577     ierr  = MatSetUp(B);CHKERRQ(ierr);
3578     *maij = B;
3579     ierr  = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr);
3580   }
3581   PetscFunctionReturn(0);
3582 }
3583