xref: /petsc/src/mat/impls/maij/maij.c (revision 5ef145eb8df80296d879d7eba3b3c32bcd261bc3)
1 /*
2     Defines the basic matrix operations for the MAIJ  matrix storage format.
3   This format is used for restriction and interpolation operations for
4   multicomponent problems. It interpolates each component the same way
5   independently.
6 
7      We provide:
8          MatMult()
9          MatMultTranspose()
10          MatMultTransposeAdd()
11          MatMultAdd()
12           and
13          MatCreateMAIJ(Mat,dof,Mat*)
14 
15      This single directory handles both the sequential and parallel codes
16 */
17 
18 #include "src/mat/impls/maij/maij.h"
19 #include "vecimpl.h"
20 
21 #undef __FUNCT__
22 #define __FUNCT__ "MatMAIJGetAIJ"
23 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
24 {
25   PetscErrorCode ierr;
26   PetscTruth     ismpimaij,isseqmaij;
27 
28   PetscFunctionBegin;
29   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
30   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
31   if (ismpimaij) {
32     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
33 
34     *B = b->A;
35   } else if (isseqmaij) {
36     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
37 
38     *B = b->AIJ;
39   } else {
40     *B = A;
41   }
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatMAIJRedimension"
47 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
48 {
49   PetscErrorCode ierr;
50   Mat            Aij;
51 
52   PetscFunctionBegin;
53   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
54   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "MatDestroy_SeqMAIJ"
60 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
61 {
62   PetscErrorCode ierr;
63   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
64 
65   PetscFunctionBegin;
66   if (b->AIJ) {
67     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
68   }
69   ierr = PetscFree(b);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 
73 #undef __FUNCT__
74 #define __FUNCT__ "MatDestroy_MPIMAIJ"
75 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
76 {
77   PetscErrorCode ierr;
78   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
79 
80   PetscFunctionBegin;
81   if (b->AIJ) {
82     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
83   }
84   if (b->OAIJ) {
85     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
86   }
87   if (b->A) {
88     ierr = MatDestroy(b->A);CHKERRQ(ierr);
89   }
90   if (b->ctx) {
91     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
92   }
93   if (b->w) {
94     ierr = VecDestroy(b->w);CHKERRQ(ierr);
95   }
96   ierr = PetscFree(b);CHKERRQ(ierr);
97   PetscFunctionReturn(0);
98 }
99 
100 /*MC
101   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
102   multicomponent problems, interpolating or restricting each component the same way independently.
103   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
104 
105   Operations provided:
106 . MatMult
107 . MatMultTranspose
108 . MatMultAdd
109 . MatMultTransposeAdd
110 
111   Level: advanced
112 
113 .seealso: MatCreateSeqDense
114 M*/
115 
116 EXTERN_C_BEGIN
117 #undef __FUNCT__
118 #define __FUNCT__ "MatCreate_MAIJ"
119 PetscErrorCode MatCreate_MAIJ(Mat A)
120 {
121   PetscErrorCode ierr;
122   Mat_MPIMAIJ    *b;
123 
124   PetscFunctionBegin;
125   ierr     = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr);
126   A->data  = (void*)b;
127   ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr);
128   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
129   A->factor           = 0;
130   A->mapping          = 0;
131 
132   b->AIJ  = 0;
133   b->dof  = 0;
134   b->OAIJ = 0;
135   b->ctx  = 0;
136   b->w    = 0;
137   PetscFunctionReturn(0);
138 }
139 EXTERN_C_END
140 
141 /* --------------------------------------------------------------------------------------*/
142 #undef __FUNCT__
143 #define __FUNCT__ "MatMult_SeqMAIJ_2"
144 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
145 {
146   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
147   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
148   PetscScalar    *x,*y,*v,sum1, sum2;
149   PetscErrorCode ierr;
150   PetscInt       m = b->AIJ->m,*idx,*ii;
151   PetscInt       n,i,jrow,j;
152 
153   PetscFunctionBegin;
154   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
155   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
156   idx  = a->j;
157   v    = a->a;
158   ii   = a->i;
159 
160   for (i=0; i<m; i++) {
161     jrow = ii[i];
162     n    = ii[i+1] - jrow;
163     sum1  = 0.0;
164     sum2  = 0.0;
165     for (j=0; j<n; j++) {
166       sum1 += v[jrow]*x[2*idx[jrow]];
167       sum2 += v[jrow]*x[2*idx[jrow]+1];
168       jrow++;
169      }
170     y[2*i]   = sum1;
171     y[2*i+1] = sum2;
172   }
173 
174   PetscLogFlops(4*a->nz - 2*m);
175   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
176   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNCT__
181 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
182 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
183 {
184   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
185   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
186   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
187   PetscErrorCode ierr;
188   PetscInt       m = b->AIJ->m,n,i,*idx;
189 
190   PetscFunctionBegin;
191   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
192   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
193   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
194 
195   for (i=0; i<m; i++) {
196     idx    = a->j + a->i[i] ;
197     v      = a->a + a->i[i] ;
198     n      = a->i[i+1] - a->i[i];
199     alpha1 = x[2*i];
200     alpha2 = x[2*i+1];
201     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
202   }
203   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
204   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
205   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
206   PetscFunctionReturn(0);
207 }
208 
209 #undef __FUNCT__
210 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
211 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
212 {
213   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
214   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
215   PetscScalar    *x,*y,*v,sum1, sum2;
216   PetscErrorCode ierr;
217   PetscInt       m = b->AIJ->m,*idx,*ii;
218   PetscInt       n,i,jrow,j;
219 
220   PetscFunctionBegin;
221   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
222   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
223   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
224   idx  = a->j;
225   v    = a->a;
226   ii   = a->i;
227 
228   for (i=0; i<m; i++) {
229     jrow = ii[i];
230     n    = ii[i+1] - jrow;
231     sum1  = 0.0;
232     sum2  = 0.0;
233     for (j=0; j<n; j++) {
234       sum1 += v[jrow]*x[2*idx[jrow]];
235       sum2 += v[jrow]*x[2*idx[jrow]+1];
236       jrow++;
237      }
238     y[2*i]   += sum1;
239     y[2*i+1] += sum2;
240   }
241 
242   PetscLogFlops(4*a->nz - 2*m);
243   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
244   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
245   PetscFunctionReturn(0);
246 }
247 #undef __FUNCT__
248 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
249 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
250 {
251   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
252   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
253   PetscScalar    *x,*y,*v,alpha1,alpha2;
254   PetscErrorCode ierr;
255   PetscInt       m = b->AIJ->m,n,i,*idx;
256 
257   PetscFunctionBegin;
258   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
259   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
260   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
261 
262   for (i=0; i<m; i++) {
263     idx   = a->j + a->i[i] ;
264     v     = a->a + a->i[i] ;
265     n     = a->i[i+1] - a->i[i];
266     alpha1 = x[2*i];
267     alpha2 = x[2*i+1];
268     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
269   }
270   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
271   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
272   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
273   PetscFunctionReturn(0);
274 }
275 /* --------------------------------------------------------------------------------------*/
276 #undef __FUNCT__
277 #define __FUNCT__ "MatMult_SeqMAIJ_3"
278 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
279 {
280   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
281   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
282   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
283   PetscErrorCode ierr;
284   PetscInt       m = b->AIJ->m,*idx,*ii;
285   PetscInt       n,i,jrow,j;
286 
287   PetscFunctionBegin;
288   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
289   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
290   idx  = a->j;
291   v    = a->a;
292   ii   = a->i;
293 
294   for (i=0; i<m; i++) {
295     jrow = ii[i];
296     n    = ii[i+1] - jrow;
297     sum1  = 0.0;
298     sum2  = 0.0;
299     sum3  = 0.0;
300     for (j=0; j<n; j++) {
301       sum1 += v[jrow]*x[3*idx[jrow]];
302       sum2 += v[jrow]*x[3*idx[jrow]+1];
303       sum3 += v[jrow]*x[3*idx[jrow]+2];
304       jrow++;
305      }
306     y[3*i]   = sum1;
307     y[3*i+1] = sum2;
308     y[3*i+2] = sum3;
309   }
310 
311   PetscLogFlops(6*a->nz - 3*m);
312   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
313   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
319 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
320 {
321   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
322   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
323   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
324   PetscErrorCode ierr;
325   PetscInt       m = b->AIJ->m,n,i,*idx;
326 
327   PetscFunctionBegin;
328   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
329   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
330   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
331 
332   for (i=0; i<m; i++) {
333     idx    = a->j + a->i[i];
334     v      = a->a + a->i[i];
335     n      = a->i[i+1] - a->i[i];
336     alpha1 = x[3*i];
337     alpha2 = x[3*i+1];
338     alpha3 = x[3*i+2];
339     while (n-->0) {
340       y[3*(*idx)]   += alpha1*(*v);
341       y[3*(*idx)+1] += alpha2*(*v);
342       y[3*(*idx)+2] += alpha3*(*v);
343       idx++; v++;
344     }
345   }
346   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
347   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
348   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 #undef __FUNCT__
353 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
354 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
355 {
356   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
357   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
358   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
359   PetscErrorCode ierr;
360   PetscInt       m = b->AIJ->m,*idx,*ii;
361   PetscInt       n,i,jrow,j;
362 
363   PetscFunctionBegin;
364   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
365   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
366   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
367   idx  = a->j;
368   v    = a->a;
369   ii   = a->i;
370 
371   for (i=0; i<m; i++) {
372     jrow = ii[i];
373     n    = ii[i+1] - jrow;
374     sum1  = 0.0;
375     sum2  = 0.0;
376     sum3  = 0.0;
377     for (j=0; j<n; j++) {
378       sum1 += v[jrow]*x[3*idx[jrow]];
379       sum2 += v[jrow]*x[3*idx[jrow]+1];
380       sum3 += v[jrow]*x[3*idx[jrow]+2];
381       jrow++;
382      }
383     y[3*i]   += sum1;
384     y[3*i+1] += sum2;
385     y[3*i+2] += sum3;
386   }
387 
388   PetscLogFlops(6*a->nz);
389   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
390   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
391   PetscFunctionReturn(0);
392 }
393 #undef __FUNCT__
394 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
395 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
396 {
397   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
398   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
399   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
400   PetscErrorCode ierr;
401   PetscInt       m = b->AIJ->m,n,i,*idx;
402 
403   PetscFunctionBegin;
404   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
405   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
406   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
407   for (i=0; i<m; i++) {
408     idx    = a->j + a->i[i] ;
409     v      = a->a + a->i[i] ;
410     n      = a->i[i+1] - a->i[i];
411     alpha1 = x[3*i];
412     alpha2 = x[3*i+1];
413     alpha3 = x[3*i+2];
414     while (n-->0) {
415       y[3*(*idx)]   += alpha1*(*v);
416       y[3*(*idx)+1] += alpha2*(*v);
417       y[3*(*idx)+2] += alpha3*(*v);
418       idx++; v++;
419     }
420   }
421   PetscLogFlops(6*a->nz);
422   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
423   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
424   PetscFunctionReturn(0);
425 }
426 
427 /* ------------------------------------------------------------------------------*/
428 #undef __FUNCT__
429 #define __FUNCT__ "MatMult_SeqMAIJ_4"
430 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
431 {
432   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
433   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
434   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
435   PetscErrorCode ierr;
436   PetscInt       m = b->AIJ->m,*idx,*ii;
437   PetscInt       n,i,jrow,j;
438 
439   PetscFunctionBegin;
440   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
441   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
442   idx  = a->j;
443   v    = a->a;
444   ii   = a->i;
445 
446   for (i=0; i<m; i++) {
447     jrow = ii[i];
448     n    = ii[i+1] - jrow;
449     sum1  = 0.0;
450     sum2  = 0.0;
451     sum3  = 0.0;
452     sum4  = 0.0;
453     for (j=0; j<n; j++) {
454       sum1 += v[jrow]*x[4*idx[jrow]];
455       sum2 += v[jrow]*x[4*idx[jrow]+1];
456       sum3 += v[jrow]*x[4*idx[jrow]+2];
457       sum4 += v[jrow]*x[4*idx[jrow]+3];
458       jrow++;
459      }
460     y[4*i]   = sum1;
461     y[4*i+1] = sum2;
462     y[4*i+2] = sum3;
463     y[4*i+3] = sum4;
464   }
465 
466   PetscLogFlops(8*a->nz - 4*m);
467   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
468   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNCT__
473 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
474 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
475 {
476   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
477   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
478   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
479   PetscErrorCode ierr;
480   PetscInt       m = b->AIJ->m,n,i,*idx;
481 
482   PetscFunctionBegin;
483   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
484   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
485   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
486   for (i=0; i<m; i++) {
487     idx    = a->j + a->i[i] ;
488     v      = a->a + a->i[i] ;
489     n      = a->i[i+1] - a->i[i];
490     alpha1 = x[4*i];
491     alpha2 = x[4*i+1];
492     alpha3 = x[4*i+2];
493     alpha4 = x[4*i+3];
494     while (n-->0) {
495       y[4*(*idx)]   += alpha1*(*v);
496       y[4*(*idx)+1] += alpha2*(*v);
497       y[4*(*idx)+2] += alpha3*(*v);
498       y[4*(*idx)+3] += alpha4*(*v);
499       idx++; v++;
500     }
501   }
502   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
503   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
504   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
510 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
511 {
512   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
513   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
514   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
515   PetscErrorCode ierr;
516   PetscInt       m = b->AIJ->m,*idx,*ii;
517   PetscInt       n,i,jrow,j;
518 
519   PetscFunctionBegin;
520   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
521   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
522   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
523   idx  = a->j;
524   v    = a->a;
525   ii   = a->i;
526 
527   for (i=0; i<m; i++) {
528     jrow = ii[i];
529     n    = ii[i+1] - jrow;
530     sum1  = 0.0;
531     sum2  = 0.0;
532     sum3  = 0.0;
533     sum4  = 0.0;
534     for (j=0; j<n; j++) {
535       sum1 += v[jrow]*x[4*idx[jrow]];
536       sum2 += v[jrow]*x[4*idx[jrow]+1];
537       sum3 += v[jrow]*x[4*idx[jrow]+2];
538       sum4 += v[jrow]*x[4*idx[jrow]+3];
539       jrow++;
540      }
541     y[4*i]   += sum1;
542     y[4*i+1] += sum2;
543     y[4*i+2] += sum3;
544     y[4*i+3] += sum4;
545   }
546 
547   PetscLogFlops(8*a->nz - 4*m);
548   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
549   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
550   PetscFunctionReturn(0);
551 }
552 #undef __FUNCT__
553 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
554 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
555 {
556   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
557   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
558   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
559   PetscErrorCode ierr;
560   PetscInt       m = b->AIJ->m,n,i,*idx;
561 
562   PetscFunctionBegin;
563   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
564   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
565   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
566 
567   for (i=0; i<m; i++) {
568     idx    = a->j + a->i[i] ;
569     v      = a->a + a->i[i] ;
570     n      = a->i[i+1] - a->i[i];
571     alpha1 = x[4*i];
572     alpha2 = x[4*i+1];
573     alpha3 = x[4*i+2];
574     alpha4 = x[4*i+3];
575     while (n-->0) {
576       y[4*(*idx)]   += alpha1*(*v);
577       y[4*(*idx)+1] += alpha2*(*v);
578       y[4*(*idx)+2] += alpha3*(*v);
579       y[4*(*idx)+3] += alpha4*(*v);
580       idx++; v++;
581     }
582   }
583   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
584   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
585   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 /* ------------------------------------------------------------------------------*/
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatMult_SeqMAIJ_5"
592 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
593 {
594   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
595   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
596   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
597   PetscErrorCode ierr;
598   PetscInt       m = b->AIJ->m,*idx,*ii;
599   PetscInt       n,i,jrow,j;
600 
601   PetscFunctionBegin;
602   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
603   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
604   idx  = a->j;
605   v    = a->a;
606   ii   = a->i;
607 
608   for (i=0; i<m; i++) {
609     jrow = ii[i];
610     n    = ii[i+1] - jrow;
611     sum1  = 0.0;
612     sum2  = 0.0;
613     sum3  = 0.0;
614     sum4  = 0.0;
615     sum5  = 0.0;
616     for (j=0; j<n; j++) {
617       sum1 += v[jrow]*x[5*idx[jrow]];
618       sum2 += v[jrow]*x[5*idx[jrow]+1];
619       sum3 += v[jrow]*x[5*idx[jrow]+2];
620       sum4 += v[jrow]*x[5*idx[jrow]+3];
621       sum5 += v[jrow]*x[5*idx[jrow]+4];
622       jrow++;
623      }
624     y[5*i]   = sum1;
625     y[5*i+1] = sum2;
626     y[5*i+2] = sum3;
627     y[5*i+3] = sum4;
628     y[5*i+4] = sum5;
629   }
630 
631   PetscLogFlops(10*a->nz - 5*m);
632   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
633   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
634   PetscFunctionReturn(0);
635 }
636 
637 #undef __FUNCT__
638 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
639 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
640 {
641   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
642   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
643   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
644   PetscErrorCode ierr;
645   PetscInt       m = b->AIJ->m,n,i,*idx;
646 
647   PetscFunctionBegin;
648   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
649   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
650   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
651 
652   for (i=0; i<m; i++) {
653     idx    = a->j + a->i[i] ;
654     v      = a->a + a->i[i] ;
655     n      = a->i[i+1] - a->i[i];
656     alpha1 = x[5*i];
657     alpha2 = x[5*i+1];
658     alpha3 = x[5*i+2];
659     alpha4 = x[5*i+3];
660     alpha5 = x[5*i+4];
661     while (n-->0) {
662       y[5*(*idx)]   += alpha1*(*v);
663       y[5*(*idx)+1] += alpha2*(*v);
664       y[5*(*idx)+2] += alpha3*(*v);
665       y[5*(*idx)+3] += alpha4*(*v);
666       y[5*(*idx)+4] += alpha5*(*v);
667       idx++; v++;
668     }
669   }
670   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
671   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
672   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
678 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
679 {
680   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
681   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
682   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
683   PetscErrorCode ierr;
684   PetscInt       m = b->AIJ->m,*idx,*ii;
685   PetscInt       n,i,jrow,j;
686 
687   PetscFunctionBegin;
688   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
689   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
690   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
691   idx  = a->j;
692   v    = a->a;
693   ii   = a->i;
694 
695   for (i=0; i<m; i++) {
696     jrow = ii[i];
697     n    = ii[i+1] - jrow;
698     sum1  = 0.0;
699     sum2  = 0.0;
700     sum3  = 0.0;
701     sum4  = 0.0;
702     sum5  = 0.0;
703     for (j=0; j<n; j++) {
704       sum1 += v[jrow]*x[5*idx[jrow]];
705       sum2 += v[jrow]*x[5*idx[jrow]+1];
706       sum3 += v[jrow]*x[5*idx[jrow]+2];
707       sum4 += v[jrow]*x[5*idx[jrow]+3];
708       sum5 += v[jrow]*x[5*idx[jrow]+4];
709       jrow++;
710      }
711     y[5*i]   += sum1;
712     y[5*i+1] += sum2;
713     y[5*i+2] += sum3;
714     y[5*i+3] += sum4;
715     y[5*i+4] += sum5;
716   }
717 
718   PetscLogFlops(10*a->nz);
719   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
720   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
721   PetscFunctionReturn(0);
722 }
723 
724 #undef __FUNCT__
725 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
726 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
727 {
728   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
729   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
730   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
731   PetscErrorCode ierr;
732   PetscInt       m = b->AIJ->m,n,i,*idx;
733 
734   PetscFunctionBegin;
735   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
736   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
737   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
738 
739   for (i=0; i<m; i++) {
740     idx    = a->j + a->i[i] ;
741     v      = a->a + a->i[i] ;
742     n      = a->i[i+1] - a->i[i];
743     alpha1 = x[5*i];
744     alpha2 = x[5*i+1];
745     alpha3 = x[5*i+2];
746     alpha4 = x[5*i+3];
747     alpha5 = x[5*i+4];
748     while (n-->0) {
749       y[5*(*idx)]   += alpha1*(*v);
750       y[5*(*idx)+1] += alpha2*(*v);
751       y[5*(*idx)+2] += alpha3*(*v);
752       y[5*(*idx)+3] += alpha4*(*v);
753       y[5*(*idx)+4] += alpha5*(*v);
754       idx++; v++;
755     }
756   }
757   PetscLogFlops(10*a->nz);
758   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
759   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
760   PetscFunctionReturn(0);
761 }
762 
763 /* ------------------------------------------------------------------------------*/
764 #undef __FUNCT__
765 #define __FUNCT__ "MatMult_SeqMAIJ_6"
766 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
767 {
768   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
769   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
770   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
771   PetscErrorCode ierr;
772   PetscInt       m = b->AIJ->m,*idx,*ii;
773   PetscInt       n,i,jrow,j;
774 
775   PetscFunctionBegin;
776   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
777   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
778   idx  = a->j;
779   v    = a->a;
780   ii   = a->i;
781 
782   for (i=0; i<m; i++) {
783     jrow = ii[i];
784     n    = ii[i+1] - jrow;
785     sum1  = 0.0;
786     sum2  = 0.0;
787     sum3  = 0.0;
788     sum4  = 0.0;
789     sum5  = 0.0;
790     sum6  = 0.0;
791     for (j=0; j<n; j++) {
792       sum1 += v[jrow]*x[6*idx[jrow]];
793       sum2 += v[jrow]*x[6*idx[jrow]+1];
794       sum3 += v[jrow]*x[6*idx[jrow]+2];
795       sum4 += v[jrow]*x[6*idx[jrow]+3];
796       sum5 += v[jrow]*x[6*idx[jrow]+4];
797       sum6 += v[jrow]*x[6*idx[jrow]+5];
798       jrow++;
799      }
800     y[6*i]   = sum1;
801     y[6*i+1] = sum2;
802     y[6*i+2] = sum3;
803     y[6*i+3] = sum4;
804     y[6*i+4] = sum5;
805     y[6*i+5] = sum6;
806   }
807 
808   PetscLogFlops(12*a->nz - 6*m);
809   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
810   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
811   PetscFunctionReturn(0);
812 }
813 
814 #undef __FUNCT__
815 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
816 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
817 {
818   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
819   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
820   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
821   PetscErrorCode ierr;
822   PetscInt       m = b->AIJ->m,n,i,*idx;
823 
824   PetscFunctionBegin;
825   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
826   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
827   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
828 
829   for (i=0; i<m; i++) {
830     idx    = a->j + a->i[i] ;
831     v      = a->a + a->i[i] ;
832     n      = a->i[i+1] - a->i[i];
833     alpha1 = x[6*i];
834     alpha2 = x[6*i+1];
835     alpha3 = x[6*i+2];
836     alpha4 = x[6*i+3];
837     alpha5 = x[6*i+4];
838     alpha6 = x[6*i+5];
839     while (n-->0) {
840       y[6*(*idx)]   += alpha1*(*v);
841       y[6*(*idx)+1] += alpha2*(*v);
842       y[6*(*idx)+2] += alpha3*(*v);
843       y[6*(*idx)+3] += alpha4*(*v);
844       y[6*(*idx)+4] += alpha5*(*v);
845       y[6*(*idx)+5] += alpha6*(*v);
846       idx++; v++;
847     }
848   }
849   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
850   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
851   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
852   PetscFunctionReturn(0);
853 }
854 
855 #undef __FUNCT__
856 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
857 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
858 {
859   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
860   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
861   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
862   PetscErrorCode ierr;
863   PetscInt       m = b->AIJ->m,*idx,*ii;
864   PetscInt       n,i,jrow,j;
865 
866   PetscFunctionBegin;
867   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
868   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
869   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
870   idx  = a->j;
871   v    = a->a;
872   ii   = a->i;
873 
874   for (i=0; i<m; i++) {
875     jrow = ii[i];
876     n    = ii[i+1] - jrow;
877     sum1  = 0.0;
878     sum2  = 0.0;
879     sum3  = 0.0;
880     sum4  = 0.0;
881     sum5  = 0.0;
882     sum6  = 0.0;
883     for (j=0; j<n; j++) {
884       sum1 += v[jrow]*x[6*idx[jrow]];
885       sum2 += v[jrow]*x[6*idx[jrow]+1];
886       sum3 += v[jrow]*x[6*idx[jrow]+2];
887       sum4 += v[jrow]*x[6*idx[jrow]+3];
888       sum5 += v[jrow]*x[6*idx[jrow]+4];
889       sum6 += v[jrow]*x[6*idx[jrow]+5];
890       jrow++;
891      }
892     y[6*i]   += sum1;
893     y[6*i+1] += sum2;
894     y[6*i+2] += sum3;
895     y[6*i+3] += sum4;
896     y[6*i+4] += sum5;
897     y[6*i+5] += sum6;
898   }
899 
900   PetscLogFlops(12*a->nz);
901   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
902   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
903   PetscFunctionReturn(0);
904 }
905 
906 #undef __FUNCT__
907 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
908 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
909 {
910   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
911   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
912   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
913   PetscErrorCode ierr;
914   PetscInt       m = b->AIJ->m,n,i,*idx;
915 
916   PetscFunctionBegin;
917   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
918   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
919   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
920 
921   for (i=0; i<m; i++) {
922     idx    = a->j + a->i[i] ;
923     v      = a->a + a->i[i] ;
924     n      = a->i[i+1] - a->i[i];
925     alpha1 = x[6*i];
926     alpha2 = x[6*i+1];
927     alpha3 = x[6*i+2];
928     alpha4 = x[6*i+3];
929     alpha5 = x[6*i+4];
930     alpha6 = x[6*i+5];
931     while (n-->0) {
932       y[6*(*idx)]   += alpha1*(*v);
933       y[6*(*idx)+1] += alpha2*(*v);
934       y[6*(*idx)+2] += alpha3*(*v);
935       y[6*(*idx)+3] += alpha4*(*v);
936       y[6*(*idx)+4] += alpha5*(*v);
937       y[6*(*idx)+5] += alpha6*(*v);
938       idx++; v++;
939     }
940   }
941   PetscLogFlops(12*a->nz);
942   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
943   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
944   PetscFunctionReturn(0);
945 }
946 
947 /* ------------------------------------------------------------------------------*/
948 #undef __FUNCT__
949 #define __FUNCT__ "MatMult_SeqMAIJ_8"
950 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
951 {
952   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
953   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
954   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
955   PetscErrorCode ierr;
956   PetscInt       m = b->AIJ->m,*idx,*ii;
957   PetscInt       n,i,jrow,j;
958 
959   PetscFunctionBegin;
960   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
961   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
962   idx  = a->j;
963   v    = a->a;
964   ii   = a->i;
965 
966   for (i=0; i<m; i++) {
967     jrow = ii[i];
968     n    = ii[i+1] - jrow;
969     sum1  = 0.0;
970     sum2  = 0.0;
971     sum3  = 0.0;
972     sum4  = 0.0;
973     sum5  = 0.0;
974     sum6  = 0.0;
975     sum7  = 0.0;
976     sum8  = 0.0;
977     for (j=0; j<n; j++) {
978       sum1 += v[jrow]*x[8*idx[jrow]];
979       sum2 += v[jrow]*x[8*idx[jrow]+1];
980       sum3 += v[jrow]*x[8*idx[jrow]+2];
981       sum4 += v[jrow]*x[8*idx[jrow]+3];
982       sum5 += v[jrow]*x[8*idx[jrow]+4];
983       sum6 += v[jrow]*x[8*idx[jrow]+5];
984       sum7 += v[jrow]*x[8*idx[jrow]+6];
985       sum8 += v[jrow]*x[8*idx[jrow]+7];
986       jrow++;
987      }
988     y[8*i]   = sum1;
989     y[8*i+1] = sum2;
990     y[8*i+2] = sum3;
991     y[8*i+3] = sum4;
992     y[8*i+4] = sum5;
993     y[8*i+5] = sum6;
994     y[8*i+6] = sum7;
995     y[8*i+7] = sum8;
996   }
997 
998   PetscLogFlops(16*a->nz - 8*m);
999   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1000   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 #undef __FUNCT__
1005 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1006 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1007 {
1008   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1009   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1010   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1011   PetscErrorCode ierr;
1012   PetscInt       m = b->AIJ->m,n,i,*idx;
1013 
1014   PetscFunctionBegin;
1015   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1016   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1017   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1018 
1019   for (i=0; i<m; i++) {
1020     idx    = a->j + a->i[i] ;
1021     v      = a->a + a->i[i] ;
1022     n      = a->i[i+1] - a->i[i];
1023     alpha1 = x[8*i];
1024     alpha2 = x[8*i+1];
1025     alpha3 = x[8*i+2];
1026     alpha4 = x[8*i+3];
1027     alpha5 = x[8*i+4];
1028     alpha6 = x[8*i+5];
1029     alpha7 = x[8*i+6];
1030     alpha8 = x[8*i+7];
1031     while (n-->0) {
1032       y[8*(*idx)]   += alpha1*(*v);
1033       y[8*(*idx)+1] += alpha2*(*v);
1034       y[8*(*idx)+2] += alpha3*(*v);
1035       y[8*(*idx)+3] += alpha4*(*v);
1036       y[8*(*idx)+4] += alpha5*(*v);
1037       y[8*(*idx)+5] += alpha6*(*v);
1038       y[8*(*idx)+6] += alpha7*(*v);
1039       y[8*(*idx)+7] += alpha8*(*v);
1040       idx++; v++;
1041     }
1042   }
1043   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
1044   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1045   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 #undef __FUNCT__
1050 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1051 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1052 {
1053   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1054   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1055   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1056   PetscErrorCode ierr;
1057   PetscInt       m = b->AIJ->m,*idx,*ii;
1058   PetscInt       n,i,jrow,j;
1059 
1060   PetscFunctionBegin;
1061   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1062   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1063   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1064   idx  = a->j;
1065   v    = a->a;
1066   ii   = a->i;
1067 
1068   for (i=0; i<m; i++) {
1069     jrow = ii[i];
1070     n    = ii[i+1] - jrow;
1071     sum1  = 0.0;
1072     sum2  = 0.0;
1073     sum3  = 0.0;
1074     sum4  = 0.0;
1075     sum5  = 0.0;
1076     sum6  = 0.0;
1077     sum7  = 0.0;
1078     sum8  = 0.0;
1079     for (j=0; j<n; j++) {
1080       sum1 += v[jrow]*x[8*idx[jrow]];
1081       sum2 += v[jrow]*x[8*idx[jrow]+1];
1082       sum3 += v[jrow]*x[8*idx[jrow]+2];
1083       sum4 += v[jrow]*x[8*idx[jrow]+3];
1084       sum5 += v[jrow]*x[8*idx[jrow]+4];
1085       sum6 += v[jrow]*x[8*idx[jrow]+5];
1086       sum7 += v[jrow]*x[8*idx[jrow]+6];
1087       sum8 += v[jrow]*x[8*idx[jrow]+7];
1088       jrow++;
1089      }
1090     y[8*i]   += sum1;
1091     y[8*i+1] += sum2;
1092     y[8*i+2] += sum3;
1093     y[8*i+3] += sum4;
1094     y[8*i+4] += sum5;
1095     y[8*i+5] += sum6;
1096     y[8*i+6] += sum7;
1097     y[8*i+7] += sum8;
1098   }
1099 
1100   PetscLogFlops(16*a->nz);
1101   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1102   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1108 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1109 {
1110   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1111   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1112   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1113   PetscErrorCode ierr;
1114   PetscInt       m = b->AIJ->m,n,i,*idx;
1115 
1116   PetscFunctionBegin;
1117   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1118   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1119   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1120   for (i=0; i<m; i++) {
1121     idx    = a->j + a->i[i] ;
1122     v      = a->a + a->i[i] ;
1123     n      = a->i[i+1] - a->i[i];
1124     alpha1 = x[8*i];
1125     alpha2 = x[8*i+1];
1126     alpha3 = x[8*i+2];
1127     alpha4 = x[8*i+3];
1128     alpha5 = x[8*i+4];
1129     alpha6 = x[8*i+5];
1130     alpha7 = x[8*i+6];
1131     alpha8 = x[8*i+7];
1132     while (n-->0) {
1133       y[8*(*idx)]   += alpha1*(*v);
1134       y[8*(*idx)+1] += alpha2*(*v);
1135       y[8*(*idx)+2] += alpha3*(*v);
1136       y[8*(*idx)+3] += alpha4*(*v);
1137       y[8*(*idx)+4] += alpha5*(*v);
1138       y[8*(*idx)+5] += alpha6*(*v);
1139       y[8*(*idx)+6] += alpha7*(*v);
1140       y[8*(*idx)+7] += alpha8*(*v);
1141       idx++; v++;
1142     }
1143   }
1144   PetscLogFlops(16*a->nz);
1145   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1146   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 /* ------------------------------------------------------------------------------*/
1151 #undef __FUNCT__
1152 #define __FUNCT__ "MatMult_SeqMAIJ_9"
1153 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1154 {
1155   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1156   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1157   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1158   PetscErrorCode ierr;
1159   PetscInt       m = b->AIJ->m,*idx,*ii;
1160   PetscInt       n,i,jrow,j;
1161 
1162   PetscFunctionBegin;
1163   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1164   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1165   idx  = a->j;
1166   v    = a->a;
1167   ii   = a->i;
1168 
1169   for (i=0; i<m; i++) {
1170     jrow = ii[i];
1171     n    = ii[i+1] - jrow;
1172     sum1  = 0.0;
1173     sum2  = 0.0;
1174     sum3  = 0.0;
1175     sum4  = 0.0;
1176     sum5  = 0.0;
1177     sum6  = 0.0;
1178     sum7  = 0.0;
1179     sum8  = 0.0;
1180     sum9  = 0.0;
1181     for (j=0; j<n; j++) {
1182       sum1 += v[jrow]*x[9*idx[jrow]];
1183       sum2 += v[jrow]*x[9*idx[jrow]+1];
1184       sum3 += v[jrow]*x[9*idx[jrow]+2];
1185       sum4 += v[jrow]*x[9*idx[jrow]+3];
1186       sum5 += v[jrow]*x[9*idx[jrow]+4];
1187       sum6 += v[jrow]*x[9*idx[jrow]+5];
1188       sum7 += v[jrow]*x[9*idx[jrow]+6];
1189       sum8 += v[jrow]*x[9*idx[jrow]+7];
1190       sum9 += v[jrow]*x[9*idx[jrow]+8];
1191       jrow++;
1192      }
1193     y[9*i]   = sum1;
1194     y[9*i+1] = sum2;
1195     y[9*i+2] = sum3;
1196     y[9*i+3] = sum4;
1197     y[9*i+4] = sum5;
1198     y[9*i+5] = sum6;
1199     y[9*i+6] = sum7;
1200     y[9*i+7] = sum8;
1201     y[9*i+8] = sum9;
1202   }
1203 
1204   PetscLogFlops(18*a->nz - 9*m);
1205   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1206   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNCT__
1211 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1212 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1213 {
1214   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1215   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1216   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1217   PetscErrorCode ierr;
1218   PetscInt       m = b->AIJ->m,n,i,*idx;
1219 
1220   PetscFunctionBegin;
1221   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1222   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1223   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1224 
1225   for (i=0; i<m; i++) {
1226     idx    = a->j + a->i[i] ;
1227     v      = a->a + a->i[i] ;
1228     n      = a->i[i+1] - a->i[i];
1229     alpha1 = x[9*i];
1230     alpha2 = x[9*i+1];
1231     alpha3 = x[9*i+2];
1232     alpha4 = x[9*i+3];
1233     alpha5 = x[9*i+4];
1234     alpha6 = x[9*i+5];
1235     alpha7 = x[9*i+6];
1236     alpha8 = x[9*i+7];
1237     alpha9 = x[9*i+8];
1238     while (n-->0) {
1239       y[9*(*idx)]   += alpha1*(*v);
1240       y[9*(*idx)+1] += alpha2*(*v);
1241       y[9*(*idx)+2] += alpha3*(*v);
1242       y[9*(*idx)+3] += alpha4*(*v);
1243       y[9*(*idx)+4] += alpha5*(*v);
1244       y[9*(*idx)+5] += alpha6*(*v);
1245       y[9*(*idx)+6] += alpha7*(*v);
1246       y[9*(*idx)+7] += alpha8*(*v);
1247       y[9*(*idx)+8] += alpha9*(*v);
1248       idx++; v++;
1249     }
1250   }
1251   PetscLogFlops(18*a->nz - 9*b->AIJ->n);
1252   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1253   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1259 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1260 {
1261   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1262   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1263   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1264   PetscErrorCode ierr;
1265   PetscInt       m = b->AIJ->m,*idx,*ii;
1266   PetscInt       n,i,jrow,j;
1267 
1268   PetscFunctionBegin;
1269   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1270   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1271   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1272   idx  = a->j;
1273   v    = a->a;
1274   ii   = a->i;
1275 
1276   for (i=0; i<m; i++) {
1277     jrow = ii[i];
1278     n    = ii[i+1] - jrow;
1279     sum1  = 0.0;
1280     sum2  = 0.0;
1281     sum3  = 0.0;
1282     sum4  = 0.0;
1283     sum5  = 0.0;
1284     sum6  = 0.0;
1285     sum7  = 0.0;
1286     sum8  = 0.0;
1287     sum9  = 0.0;
1288     for (j=0; j<n; j++) {
1289       sum1 += v[jrow]*x[9*idx[jrow]];
1290       sum2 += v[jrow]*x[9*idx[jrow]+1];
1291       sum3 += v[jrow]*x[9*idx[jrow]+2];
1292       sum4 += v[jrow]*x[9*idx[jrow]+3];
1293       sum5 += v[jrow]*x[9*idx[jrow]+4];
1294       sum6 += v[jrow]*x[9*idx[jrow]+5];
1295       sum7 += v[jrow]*x[9*idx[jrow]+6];
1296       sum8 += v[jrow]*x[9*idx[jrow]+7];
1297       sum9 += v[jrow]*x[9*idx[jrow]+8];
1298       jrow++;
1299      }
1300     y[9*i]   += sum1;
1301     y[9*i+1] += sum2;
1302     y[9*i+2] += sum3;
1303     y[9*i+3] += sum4;
1304     y[9*i+4] += sum5;
1305     y[9*i+5] += sum6;
1306     y[9*i+6] += sum7;
1307     y[9*i+7] += sum8;
1308     y[9*i+8] += sum9;
1309   }
1310 
1311   PetscLogFlops(18*a->nz);
1312   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1313   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1319 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1320 {
1321   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1322   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1323   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1324   PetscErrorCode ierr;
1325   PetscInt       m = b->AIJ->m,n,i,*idx;
1326 
1327   PetscFunctionBegin;
1328   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1329   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1330   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1331   for (i=0; i<m; i++) {
1332     idx    = a->j + a->i[i] ;
1333     v      = a->a + a->i[i] ;
1334     n      = a->i[i+1] - a->i[i];
1335     alpha1 = x[9*i];
1336     alpha2 = x[9*i+1];
1337     alpha3 = x[9*i+2];
1338     alpha4 = x[9*i+3];
1339     alpha5 = x[9*i+4];
1340     alpha6 = x[9*i+5];
1341     alpha7 = x[9*i+6];
1342     alpha8 = x[9*i+7];
1343     alpha9 = x[9*i+8];
1344     while (n-->0) {
1345       y[9*(*idx)]   += alpha1*(*v);
1346       y[9*(*idx)+1] += alpha2*(*v);
1347       y[9*(*idx)+2] += alpha3*(*v);
1348       y[9*(*idx)+3] += alpha4*(*v);
1349       y[9*(*idx)+4] += alpha5*(*v);
1350       y[9*(*idx)+5] += alpha6*(*v);
1351       y[9*(*idx)+6] += alpha7*(*v);
1352       y[9*(*idx)+7] += alpha8*(*v);
1353       y[9*(*idx)+8] += alpha9*(*v);
1354       idx++; v++;
1355     }
1356   }
1357   PetscLogFlops(18*a->nz);
1358   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1359   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 /*--------------------------------------------------------------------------------------------*/
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatMult_SeqMAIJ_16"
1366 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1367 {
1368   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1369   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1370   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1371   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1372   PetscErrorCode ierr;
1373   PetscInt       m = b->AIJ->m,*idx,*ii;
1374   PetscInt       n,i,jrow,j;
1375 
1376   PetscFunctionBegin;
1377   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1378   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1379   idx  = a->j;
1380   v    = a->a;
1381   ii   = a->i;
1382 
1383   for (i=0; i<m; i++) {
1384     jrow = ii[i];
1385     n    = ii[i+1] - jrow;
1386     sum1  = 0.0;
1387     sum2  = 0.0;
1388     sum3  = 0.0;
1389     sum4  = 0.0;
1390     sum5  = 0.0;
1391     sum6  = 0.0;
1392     sum7  = 0.0;
1393     sum8  = 0.0;
1394     sum9  = 0.0;
1395     sum10 = 0.0;
1396     sum11 = 0.0;
1397     sum12 = 0.0;
1398     sum13 = 0.0;
1399     sum14 = 0.0;
1400     sum15 = 0.0;
1401     sum16 = 0.0;
1402     for (j=0; j<n; j++) {
1403       sum1  += v[jrow]*x[16*idx[jrow]];
1404       sum2  += v[jrow]*x[16*idx[jrow]+1];
1405       sum3  += v[jrow]*x[16*idx[jrow]+2];
1406       sum4  += v[jrow]*x[16*idx[jrow]+3];
1407       sum5  += v[jrow]*x[16*idx[jrow]+4];
1408       sum6  += v[jrow]*x[16*idx[jrow]+5];
1409       sum7  += v[jrow]*x[16*idx[jrow]+6];
1410       sum8  += v[jrow]*x[16*idx[jrow]+7];
1411       sum9  += v[jrow]*x[16*idx[jrow]+8];
1412       sum10 += v[jrow]*x[16*idx[jrow]+9];
1413       sum11 += v[jrow]*x[16*idx[jrow]+10];
1414       sum12 += v[jrow]*x[16*idx[jrow]+11];
1415       sum13 += v[jrow]*x[16*idx[jrow]+12];
1416       sum14 += v[jrow]*x[16*idx[jrow]+13];
1417       sum15 += v[jrow]*x[16*idx[jrow]+14];
1418       sum16 += v[jrow]*x[16*idx[jrow]+15];
1419       jrow++;
1420      }
1421     y[16*i]    = sum1;
1422     y[16*i+1]  = sum2;
1423     y[16*i+2]  = sum3;
1424     y[16*i+3]  = sum4;
1425     y[16*i+4]  = sum5;
1426     y[16*i+5]  = sum6;
1427     y[16*i+6]  = sum7;
1428     y[16*i+7]  = sum8;
1429     y[16*i+8]  = sum9;
1430     y[16*i+9]  = sum10;
1431     y[16*i+10] = sum11;
1432     y[16*i+11] = sum12;
1433     y[16*i+12] = sum13;
1434     y[16*i+13] = sum14;
1435     y[16*i+14] = sum15;
1436     y[16*i+15] = sum16;
1437   }
1438 
1439   PetscLogFlops(32*a->nz - 16*m);
1440   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1441   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 #undef __FUNCT__
1446 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1447 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1448 {
1449   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1450   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1451   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1452   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1453   PetscErrorCode ierr;
1454   PetscInt       m = b->AIJ->m,n,i,*idx;
1455 
1456   PetscFunctionBegin;
1457   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1458   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1459   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1460 
1461   for (i=0; i<m; i++) {
1462     idx    = a->j + a->i[i] ;
1463     v      = a->a + a->i[i] ;
1464     n      = a->i[i+1] - a->i[i];
1465     alpha1  = x[16*i];
1466     alpha2  = x[16*i+1];
1467     alpha3  = x[16*i+2];
1468     alpha4  = x[16*i+3];
1469     alpha5  = x[16*i+4];
1470     alpha6  = x[16*i+5];
1471     alpha7  = x[16*i+6];
1472     alpha8  = x[16*i+7];
1473     alpha9  = x[16*i+8];
1474     alpha10 = x[16*i+9];
1475     alpha11 = x[16*i+10];
1476     alpha12 = x[16*i+11];
1477     alpha13 = x[16*i+12];
1478     alpha14 = x[16*i+13];
1479     alpha15 = x[16*i+14];
1480     alpha16 = x[16*i+15];
1481     while (n-->0) {
1482       y[16*(*idx)]    += alpha1*(*v);
1483       y[16*(*idx)+1]  += alpha2*(*v);
1484       y[16*(*idx)+2]  += alpha3*(*v);
1485       y[16*(*idx)+3]  += alpha4*(*v);
1486       y[16*(*idx)+4]  += alpha5*(*v);
1487       y[16*(*idx)+5]  += alpha6*(*v);
1488       y[16*(*idx)+6]  += alpha7*(*v);
1489       y[16*(*idx)+7]  += alpha8*(*v);
1490       y[16*(*idx)+8]  += alpha9*(*v);
1491       y[16*(*idx)+9]  += alpha10*(*v);
1492       y[16*(*idx)+10] += alpha11*(*v);
1493       y[16*(*idx)+11] += alpha12*(*v);
1494       y[16*(*idx)+12] += alpha13*(*v);
1495       y[16*(*idx)+13] += alpha14*(*v);
1496       y[16*(*idx)+14] += alpha15*(*v);
1497       y[16*(*idx)+15] += alpha16*(*v);
1498       idx++; v++;
1499     }
1500   }
1501   PetscLogFlops(32*a->nz - 16*b->AIJ->n);
1502   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1503   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 #undef __FUNCT__
1508 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
1509 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1510 {
1511   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1512   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1513   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1514   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1515   PetscErrorCode ierr;
1516   PetscInt       m = b->AIJ->m,*idx,*ii;
1517   PetscInt       n,i,jrow,j;
1518 
1519   PetscFunctionBegin;
1520   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1521   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1522   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1523   idx  = a->j;
1524   v    = a->a;
1525   ii   = a->i;
1526 
1527   for (i=0; i<m; i++) {
1528     jrow = ii[i];
1529     n    = ii[i+1] - jrow;
1530     sum1  = 0.0;
1531     sum2  = 0.0;
1532     sum3  = 0.0;
1533     sum4  = 0.0;
1534     sum5  = 0.0;
1535     sum6  = 0.0;
1536     sum7  = 0.0;
1537     sum8  = 0.0;
1538     sum9  = 0.0;
1539     sum10 = 0.0;
1540     sum11 = 0.0;
1541     sum12 = 0.0;
1542     sum13 = 0.0;
1543     sum14 = 0.0;
1544     sum15 = 0.0;
1545     sum16 = 0.0;
1546     for (j=0; j<n; j++) {
1547       sum1  += v[jrow]*x[16*idx[jrow]];
1548       sum2  += v[jrow]*x[16*idx[jrow]+1];
1549       sum3  += v[jrow]*x[16*idx[jrow]+2];
1550       sum4  += v[jrow]*x[16*idx[jrow]+3];
1551       sum5  += v[jrow]*x[16*idx[jrow]+4];
1552       sum6  += v[jrow]*x[16*idx[jrow]+5];
1553       sum7  += v[jrow]*x[16*idx[jrow]+6];
1554       sum8  += v[jrow]*x[16*idx[jrow]+7];
1555       sum9  += v[jrow]*x[16*idx[jrow]+8];
1556       sum10 += v[jrow]*x[16*idx[jrow]+9];
1557       sum11 += v[jrow]*x[16*idx[jrow]+10];
1558       sum12 += v[jrow]*x[16*idx[jrow]+11];
1559       sum13 += v[jrow]*x[16*idx[jrow]+12];
1560       sum14 += v[jrow]*x[16*idx[jrow]+13];
1561       sum15 += v[jrow]*x[16*idx[jrow]+14];
1562       sum16 += v[jrow]*x[16*idx[jrow]+15];
1563       jrow++;
1564      }
1565     y[16*i]    += sum1;
1566     y[16*i+1]  += sum2;
1567     y[16*i+2]  += sum3;
1568     y[16*i+3]  += sum4;
1569     y[16*i+4]  += sum5;
1570     y[16*i+5]  += sum6;
1571     y[16*i+6]  += sum7;
1572     y[16*i+7]  += sum8;
1573     y[16*i+8]  += sum9;
1574     y[16*i+9]  += sum10;
1575     y[16*i+10] += sum11;
1576     y[16*i+11] += sum12;
1577     y[16*i+12] += sum13;
1578     y[16*i+13] += sum14;
1579     y[16*i+14] += sum15;
1580     y[16*i+15] += sum16;
1581   }
1582 
1583   PetscLogFlops(32*a->nz);
1584   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1585   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
1591 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1592 {
1593   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1594   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1595   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1596   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1597   PetscErrorCode ierr;
1598   PetscInt       m = b->AIJ->m,n,i,*idx;
1599 
1600   PetscFunctionBegin;
1601   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1602   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1603   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1604   for (i=0; i<m; i++) {
1605     idx    = a->j + a->i[i] ;
1606     v      = a->a + a->i[i] ;
1607     n      = a->i[i+1] - a->i[i];
1608     alpha1 = x[16*i];
1609     alpha2 = x[16*i+1];
1610     alpha3 = x[16*i+2];
1611     alpha4 = x[16*i+3];
1612     alpha5 = x[16*i+4];
1613     alpha6 = x[16*i+5];
1614     alpha7 = x[16*i+6];
1615     alpha8 = x[16*i+7];
1616     alpha9  = x[16*i+8];
1617     alpha10 = x[16*i+9];
1618     alpha11 = x[16*i+10];
1619     alpha12 = x[16*i+11];
1620     alpha13 = x[16*i+12];
1621     alpha14 = x[16*i+13];
1622     alpha15 = x[16*i+14];
1623     alpha16 = x[16*i+15];
1624     while (n-->0) {
1625       y[16*(*idx)]   += alpha1*(*v);
1626       y[16*(*idx)+1] += alpha2*(*v);
1627       y[16*(*idx)+2] += alpha3*(*v);
1628       y[16*(*idx)+3] += alpha4*(*v);
1629       y[16*(*idx)+4] += alpha5*(*v);
1630       y[16*(*idx)+5] += alpha6*(*v);
1631       y[16*(*idx)+6] += alpha7*(*v);
1632       y[16*(*idx)+7] += alpha8*(*v);
1633       y[16*(*idx)+8]  += alpha9*(*v);
1634       y[16*(*idx)+9]  += alpha10*(*v);
1635       y[16*(*idx)+10] += alpha11*(*v);
1636       y[16*(*idx)+11] += alpha12*(*v);
1637       y[16*(*idx)+12] += alpha13*(*v);
1638       y[16*(*idx)+13] += alpha14*(*v);
1639       y[16*(*idx)+14] += alpha15*(*v);
1640       y[16*(*idx)+15] += alpha16*(*v);
1641       idx++; v++;
1642     }
1643   }
1644   PetscLogFlops(32*a->nz);
1645   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1646   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 /*===================================================================================*/
1651 #undef __FUNCT__
1652 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
1653 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1654 {
1655   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1656   PetscErrorCode ierr;
1657 
1658   PetscFunctionBegin;
1659   /* start the scatter */
1660   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1661   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
1662   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1663   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 #undef __FUNCT__
1668 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
1669 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1670 {
1671   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1672   PetscErrorCode ierr;
1673 
1674   PetscFunctionBegin;
1675   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1676   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1677   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
1678   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
1684 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1685 {
1686   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1687   PetscErrorCode ierr;
1688 
1689   PetscFunctionBegin;
1690   /* start the scatter */
1691   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1692   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1693   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1694   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
1700 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1701 {
1702   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1703   PetscErrorCode ierr;
1704 
1705   PetscFunctionBegin;
1706   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1707   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1708   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1709   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 #include "src/mat/impls/aij/seq/aij.h"
1714 #undef __FUNCT__
1715 #define __FUNCT__ "MatConvert_MAIJ_SeqAIJ"
1716 PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A,const MatType newtype,Mat *B)
1717 {
1718   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1719   Mat               a = b->AIJ;
1720   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
1721   PetscErrorCode    ierr;
1722   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii;
1723   const PetscInt    *cols;
1724   const PetscScalar *vals;
1725 
1726   PetscFunctionBegin;
1727   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
1728   ierr = PetscMalloc(4*m*sizeof(int),&ilen);CHKERRQ(ierr);
1729   for (i=0; i<m; i++) {
1730     nmax = PetscMax(nmax,aij->ilen[i]);
1731     for (j=0; j<4; j++) {
1732       ilen[4*i+j] = aij->ilen[i];
1733     }
1734   }
1735   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,4*m,4*n,0,ilen,B);CHKERRQ(ierr);
1736   ierr = MatSetOption(*B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1737   ierr = PetscFree(ilen);CHKERRQ(ierr);
1738   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
1739   ii   = 0;
1740   for (i=0; i<m; i++) {
1741     ierr = MatGetRow(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
1742     for (j=0; j<4; j++) {
1743       for (k=0; k<ncols; k++) {
1744         icols[k] = 4*cols[k]+j;
1745       }
1746       ierr = MatSetValues_SeqAIJ(*B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
1747       ii++;
1748     }
1749     ierr = MatRestoreRow(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
1750   }
1751   ierr = PetscFree(icols);CHKERRQ(ierr);
1752   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1753   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1754   PetscFunctionReturn(0);
1755 }
1756 
1757 /* ---------------------------------------------------------------------------------- */
1758 /*MC
1759   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
1760   operations for multicomponent problems.  It interpolates each component the same
1761   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
1762   and MATMPIAIJ for distributed matrices.
1763 
1764   Operations provided:
1765 . MatMult
1766 . MatMultTranspose
1767 . MatMultAdd
1768 . MatMultTransposeAdd
1769 
1770   Level: advanced
1771 
1772 M*/
1773 #undef __FUNCT__
1774 #define __FUNCT__ "MatCreateMAIJ"
1775 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
1776 {
1777   PetscErrorCode ierr;
1778   PetscMPIInt    size;
1779   PetscInt       n;
1780   Mat_MPIMAIJ    *b;
1781   Mat            B;
1782 
1783   PetscFunctionBegin;
1784   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
1785 
1786   if (dof == 1) {
1787     *maij = A;
1788   } else {
1789     ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr);
1790     B->assembled    = PETSC_TRUE;
1791 
1792     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1793     if (size == 1) {
1794       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
1795       B->ops->destroy = MatDestroy_SeqMAIJ;
1796       b      = (Mat_MPIMAIJ*)B->data;
1797       b->dof = dof;
1798       b->AIJ = A;
1799       if (dof == 2) {
1800         B->ops->mult             = MatMult_SeqMAIJ_2;
1801         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1802         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1803         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1804       } else if (dof == 3) {
1805         B->ops->mult             = MatMult_SeqMAIJ_3;
1806         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1807         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1808         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1809       } else if (dof == 4) {
1810         B->ops->mult             = MatMult_SeqMAIJ_4;
1811         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1812         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1813         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1814       } else if (dof == 5) {
1815         B->ops->mult             = MatMult_SeqMAIJ_5;
1816         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1817         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1818         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1819       } else if (dof == 6) {
1820         B->ops->mult             = MatMult_SeqMAIJ_6;
1821         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1822         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1823         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1824       } else if (dof == 8) {
1825         B->ops->mult             = MatMult_SeqMAIJ_8;
1826         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
1827         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
1828         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1829       } else if (dof == 9) {
1830         B->ops->mult             = MatMult_SeqMAIJ_9;
1831         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
1832         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
1833         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
1834       } else if (dof == 16) {
1835         B->ops->mult             = MatMult_SeqMAIJ_16;
1836         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
1837         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
1838         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1839       } else {
1840         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
1841       }
1842       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
1843     } else {
1844       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1845       IS         from,to;
1846       Vec        gvec;
1847       PetscInt        *garray,i;
1848 
1849       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
1850       B->ops->destroy = MatDestroy_MPIMAIJ;
1851       b      = (Mat_MPIMAIJ*)B->data;
1852       b->dof = dof;
1853       b->A   = A;
1854       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
1855       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
1856 
1857       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
1858       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
1859 
1860       /* create two temporary Index sets for build scatter gather */
1861       ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
1862       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1863       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
1864       ierr = PetscFree(garray);CHKERRQ(ierr);
1865       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
1866 
1867       /* create temporary global vector to generate scatter context */
1868       ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr);
1869 
1870       /* generate the scatter context */
1871       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
1872 
1873       ierr = ISDestroy(from);CHKERRQ(ierr);
1874       ierr = ISDestroy(to);CHKERRQ(ierr);
1875       ierr = VecDestroy(gvec);CHKERRQ(ierr);
1876 
1877       B->ops->mult             = MatMult_MPIMAIJ_dof;
1878       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1879       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1880       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1881     }
1882     *maij = B;
1883   }
1884   PetscFunctionReturn(0);
1885 }
1886 
1887 
1888 
1889 
1890 
1891 
1892 
1893 
1894 
1895 
1896 
1897 
1898