xref: /petsc/src/mat/impls/maij/maij.c (revision 09f3b4e5628a00a1eaf17d80982cfbcc515cc9c1)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the MAIJ  matrix storage format.
5   This format is used for restriction and interpolation operations for
6   multicomponent problems. It interpolates each component the same way
7   independently.
8 
9      We provide:
10          MatMult()
11          MatMultTranspose()
12          MatMultTransposeAdd()
13          MatMultAdd()
14           and
15          MatCreateMAIJ(Mat,dof,Mat*)
16 
17      This single directory handles both the sequential and parallel codes
18 */
19 
20 #include "src/mat/impls/maij/maij.h"
21 #include "src/mat/utils/freespace.h"
22 #include "private/vecimpl.h"
23 
24 #undef __FUNCT__
25 #define __FUNCT__ "MatMAIJGetAIJ"
26 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B)
27 {
28   PetscErrorCode ierr;
29   PetscTruth     ismpimaij,isseqmaij;
30 
31   PetscFunctionBegin;
32   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
33   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
34   if (ismpimaij) {
35     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
36 
37     *B = b->A;
38   } else if (isseqmaij) {
39     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
40 
41     *B = b->AIJ;
42   } else {
43     *B = A;
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMAIJRedimension"
50 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
51 {
52   PetscErrorCode ierr;
53   Mat            Aij;
54 
55   PetscFunctionBegin;
56   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
57   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatDestroy_SeqMAIJ"
63 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
64 {
65   PetscErrorCode ierr;
66   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
67 
68   PetscFunctionBegin;
69   if (b->AIJ) {
70     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
71   }
72   ierr = PetscFree(b);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatView_SeqMAIJ"
78 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
79 {
80   PetscErrorCode ierr;
81   Mat            B;
82 
83   PetscFunctionBegin;
84   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
85   ierr = MatView(B,viewer);CHKERRQ(ierr);
86   ierr = MatDestroy(B);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatView_MPIMAIJ"
92 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
93 {
94   PetscErrorCode ierr;
95   Mat            B;
96 
97   PetscFunctionBegin;
98   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
99   ierr = MatView(B,viewer);CHKERRQ(ierr);
100   ierr = MatDestroy(B);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatDestroy_MPIMAIJ"
106 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107 {
108   PetscErrorCode ierr;
109   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
110 
111   PetscFunctionBegin;
112   if (b->AIJ) {
113     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
114   }
115   if (b->OAIJ) {
116     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
117   }
118   if (b->A) {
119     ierr = MatDestroy(b->A);CHKERRQ(ierr);
120   }
121   if (b->ctx) {
122     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
123   }
124   if (b->w) {
125     ierr = VecDestroy(b->w);CHKERRQ(ierr);
126   }
127   ierr = PetscFree(b);CHKERRQ(ierr);
128   PetscFunctionReturn(0);
129 }
130 
131 /*MC
132   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
133   multicomponent problems, interpolating or restricting each component the same way independently.
134   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
135 
136   Operations provided:
137 . MatMult
138 . MatMultTranspose
139 . MatMultAdd
140 . MatMultTransposeAdd
141 
142   Level: advanced
143 
144 .seealso: MatCreateSeqDense
145 M*/
146 
147 EXTERN_C_BEGIN
148 #undef __FUNCT__
149 #define __FUNCT__ "MatCreate_MAIJ"
150 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A)
151 {
152   PetscErrorCode ierr;
153   Mat_MPIMAIJ    *b;
154 
155   PetscFunctionBegin;
156   ierr     = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr);
157   A->data  = (void*)b;
158   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
159   A->factor           = 0;
160   A->mapping          = 0;
161 
162   b->AIJ  = 0;
163   b->dof  = 0;
164   b->OAIJ = 0;
165   b->ctx  = 0;
166   b->w    = 0;
167   PetscFunctionReturn(0);
168 }
169 EXTERN_C_END
170 
171 /* --------------------------------------------------------------------------------------*/
172 #undef __FUNCT__
173 #define __FUNCT__ "MatMult_SeqMAIJ_2"
174 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
175 {
176   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
177   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
178   PetscScalar    *x,*y,*v,sum1, sum2;
179   PetscErrorCode ierr;
180   PetscInt       m = b->AIJ->m,*idx,*ii;
181   PetscInt       n,i,jrow,j;
182 
183   PetscFunctionBegin;
184   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
185   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
186   idx  = a->j;
187   v    = a->a;
188   ii   = a->i;
189 
190   for (i=0; i<m; i++) {
191     jrow = ii[i];
192     n    = ii[i+1] - jrow;
193     sum1  = 0.0;
194     sum2  = 0.0;
195     for (j=0; j<n; j++) {
196       sum1 += v[jrow]*x[2*idx[jrow]];
197       sum2 += v[jrow]*x[2*idx[jrow]+1];
198       jrow++;
199      }
200     y[2*i]   = sum1;
201     y[2*i+1] = sum2;
202   }
203 
204   ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr);
205   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
206   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
207   PetscFunctionReturn(0);
208 }
209 
210 #undef __FUNCT__
211 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
212 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
213 {
214   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
215   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
216   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
217   PetscErrorCode ierr;
218   PetscInt       m = b->AIJ->m,n,i,*idx;
219 
220   PetscFunctionBegin;
221   ierr = VecSet(yy,zero);CHKERRQ(ierr);
222   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
223   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
224 
225   for (i=0; i<m; i++) {
226     idx    = a->j + a->i[i] ;
227     v      = a->a + a->i[i] ;
228     n      = a->i[i+1] - a->i[i];
229     alpha1 = x[2*i];
230     alpha2 = x[2*i+1];
231     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
232   }
233   ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->n);CHKERRQ(ierr);
234   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
235   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
241 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
242 {
243   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
244   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
245   PetscScalar    *x,*y,*v,sum1, sum2;
246   PetscErrorCode ierr;
247   PetscInt       m = b->AIJ->m,*idx,*ii;
248   PetscInt       n,i,jrow,j;
249 
250   PetscFunctionBegin;
251   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
252   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
253   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
254   idx  = a->j;
255   v    = a->a;
256   ii   = a->i;
257 
258   for (i=0; i<m; i++) {
259     jrow = ii[i];
260     n    = ii[i+1] - jrow;
261     sum1  = 0.0;
262     sum2  = 0.0;
263     for (j=0; j<n; j++) {
264       sum1 += v[jrow]*x[2*idx[jrow]];
265       sum2 += v[jrow]*x[2*idx[jrow]+1];
266       jrow++;
267      }
268     y[2*i]   += sum1;
269     y[2*i+1] += sum2;
270   }
271 
272   ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr);
273   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
274   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
275   PetscFunctionReturn(0);
276 }
277 #undef __FUNCT__
278 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
279 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
280 {
281   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
282   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
283   PetscScalar    *x,*y,*v,alpha1,alpha2;
284   PetscErrorCode ierr;
285   PetscInt       m = b->AIJ->m,n,i,*idx;
286 
287   PetscFunctionBegin;
288   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
289   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
290   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
291 
292   for (i=0; i<m; i++) {
293     idx   = a->j + a->i[i] ;
294     v     = a->a + a->i[i] ;
295     n     = a->i[i+1] - a->i[i];
296     alpha1 = x[2*i];
297     alpha2 = x[2*i+1];
298     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
299   }
300   ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->n);CHKERRQ(ierr);
301   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
302   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 /* --------------------------------------------------------------------------------------*/
306 #undef __FUNCT__
307 #define __FUNCT__ "MatMult_SeqMAIJ_3"
308 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
309 {
310   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
311   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
312   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
313   PetscErrorCode ierr;
314   PetscInt       m = b->AIJ->m,*idx,*ii;
315   PetscInt       n,i,jrow,j;
316 
317   PetscFunctionBegin;
318   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
319   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
320   idx  = a->j;
321   v    = a->a;
322   ii   = a->i;
323 
324   for (i=0; i<m; i++) {
325     jrow = ii[i];
326     n    = ii[i+1] - jrow;
327     sum1  = 0.0;
328     sum2  = 0.0;
329     sum3  = 0.0;
330     for (j=0; j<n; j++) {
331       sum1 += v[jrow]*x[3*idx[jrow]];
332       sum2 += v[jrow]*x[3*idx[jrow]+1];
333       sum3 += v[jrow]*x[3*idx[jrow]+2];
334       jrow++;
335      }
336     y[3*i]   = sum1;
337     y[3*i+1] = sum2;
338     y[3*i+2] = sum3;
339   }
340 
341   ierr = PetscLogFlops(6*a->nz - 3*m);CHKERRQ(ierr);
342   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
343   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
344   PetscFunctionReturn(0);
345 }
346 
347 #undef __FUNCT__
348 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
349 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
350 {
351   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
352   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
353   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
354   PetscErrorCode ierr;
355   PetscInt       m = b->AIJ->m,n,i,*idx;
356 
357   PetscFunctionBegin;
358   ierr = VecSet(yy,zero);CHKERRQ(ierr);
359   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
360   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
361 
362   for (i=0; i<m; i++) {
363     idx    = a->j + a->i[i];
364     v      = a->a + a->i[i];
365     n      = a->i[i+1] - a->i[i];
366     alpha1 = x[3*i];
367     alpha2 = x[3*i+1];
368     alpha3 = x[3*i+2];
369     while (n-->0) {
370       y[3*(*idx)]   += alpha1*(*v);
371       y[3*(*idx)+1] += alpha2*(*v);
372       y[3*(*idx)+2] += alpha3*(*v);
373       idx++; v++;
374     }
375   }
376   ierr = PetscLogFlops(6*a->nz - 3*b->AIJ->n);CHKERRQ(ierr);
377   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
378   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
379   PetscFunctionReturn(0);
380 }
381 
382 #undef __FUNCT__
383 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
384 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
385 {
386   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
387   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
388   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
389   PetscErrorCode ierr;
390   PetscInt       m = b->AIJ->m,*idx,*ii;
391   PetscInt       n,i,jrow,j;
392 
393   PetscFunctionBegin;
394   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
395   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
396   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
397   idx  = a->j;
398   v    = a->a;
399   ii   = a->i;
400 
401   for (i=0; i<m; i++) {
402     jrow = ii[i];
403     n    = ii[i+1] - jrow;
404     sum1  = 0.0;
405     sum2  = 0.0;
406     sum3  = 0.0;
407     for (j=0; j<n; j++) {
408       sum1 += v[jrow]*x[3*idx[jrow]];
409       sum2 += v[jrow]*x[3*idx[jrow]+1];
410       sum3 += v[jrow]*x[3*idx[jrow]+2];
411       jrow++;
412      }
413     y[3*i]   += sum1;
414     y[3*i+1] += sum2;
415     y[3*i+2] += sum3;
416   }
417 
418   ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr);
419   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
420   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
421   PetscFunctionReturn(0);
422 }
423 #undef __FUNCT__
424 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
425 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
426 {
427   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
428   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
429   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
430   PetscErrorCode ierr;
431   PetscInt       m = b->AIJ->m,n,i,*idx;
432 
433   PetscFunctionBegin;
434   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
435   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
436   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
437   for (i=0; i<m; i++) {
438     idx    = a->j + a->i[i] ;
439     v      = a->a + a->i[i] ;
440     n      = a->i[i+1] - a->i[i];
441     alpha1 = x[3*i];
442     alpha2 = x[3*i+1];
443     alpha3 = x[3*i+2];
444     while (n-->0) {
445       y[3*(*idx)]   += alpha1*(*v);
446       y[3*(*idx)+1] += alpha2*(*v);
447       y[3*(*idx)+2] += alpha3*(*v);
448       idx++; v++;
449     }
450   }
451   ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr);
452   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
453   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
454   PetscFunctionReturn(0);
455 }
456 
457 /* ------------------------------------------------------------------------------*/
458 #undef __FUNCT__
459 #define __FUNCT__ "MatMult_SeqMAIJ_4"
460 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
461 {
462   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
464   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
465   PetscErrorCode ierr;
466   PetscInt       m = b->AIJ->m,*idx,*ii;
467   PetscInt       n,i,jrow,j;
468 
469   PetscFunctionBegin;
470   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
471   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
472   idx  = a->j;
473   v    = a->a;
474   ii   = a->i;
475 
476   for (i=0; i<m; i++) {
477     jrow = ii[i];
478     n    = ii[i+1] - jrow;
479     sum1  = 0.0;
480     sum2  = 0.0;
481     sum3  = 0.0;
482     sum4  = 0.0;
483     for (j=0; j<n; j++) {
484       sum1 += v[jrow]*x[4*idx[jrow]];
485       sum2 += v[jrow]*x[4*idx[jrow]+1];
486       sum3 += v[jrow]*x[4*idx[jrow]+2];
487       sum4 += v[jrow]*x[4*idx[jrow]+3];
488       jrow++;
489      }
490     y[4*i]   = sum1;
491     y[4*i+1] = sum2;
492     y[4*i+2] = sum3;
493     y[4*i+3] = sum4;
494   }
495 
496   ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr);
497   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
498   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
499   PetscFunctionReturn(0);
500 }
501 
502 #undef __FUNCT__
503 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
504 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
505 {
506   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
507   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
508   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
509   PetscErrorCode ierr;
510   PetscInt       m = b->AIJ->m,n,i,*idx;
511 
512   PetscFunctionBegin;
513   ierr = VecSet(yy,zero);CHKERRQ(ierr);
514   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
515   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
516   for (i=0; i<m; i++) {
517     idx    = a->j + a->i[i] ;
518     v      = a->a + a->i[i] ;
519     n      = a->i[i+1] - a->i[i];
520     alpha1 = x[4*i];
521     alpha2 = x[4*i+1];
522     alpha3 = x[4*i+2];
523     alpha4 = x[4*i+3];
524     while (n-->0) {
525       y[4*(*idx)]   += alpha1*(*v);
526       y[4*(*idx)+1] += alpha2*(*v);
527       y[4*(*idx)+2] += alpha3*(*v);
528       y[4*(*idx)+3] += alpha4*(*v);
529       idx++; v++;
530     }
531   }
532   ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->n);CHKERRQ(ierr);
533   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
534   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
535   PetscFunctionReturn(0);
536 }
537 
538 #undef __FUNCT__
539 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
540 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
541 {
542   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
543   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
544   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
545   PetscErrorCode ierr;
546   PetscInt       m = b->AIJ->m,*idx,*ii;
547   PetscInt       n,i,jrow,j;
548 
549   PetscFunctionBegin;
550   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
551   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
552   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
553   idx  = a->j;
554   v    = a->a;
555   ii   = a->i;
556 
557   for (i=0; i<m; i++) {
558     jrow = ii[i];
559     n    = ii[i+1] - jrow;
560     sum1  = 0.0;
561     sum2  = 0.0;
562     sum3  = 0.0;
563     sum4  = 0.0;
564     for (j=0; j<n; j++) {
565       sum1 += v[jrow]*x[4*idx[jrow]];
566       sum2 += v[jrow]*x[4*idx[jrow]+1];
567       sum3 += v[jrow]*x[4*idx[jrow]+2];
568       sum4 += v[jrow]*x[4*idx[jrow]+3];
569       jrow++;
570      }
571     y[4*i]   += sum1;
572     y[4*i+1] += sum2;
573     y[4*i+2] += sum3;
574     y[4*i+3] += sum4;
575   }
576 
577   ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr);
578   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
579   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
580   PetscFunctionReturn(0);
581 }
582 #undef __FUNCT__
583 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
584 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
585 {
586   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
587   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
588   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
589   PetscErrorCode ierr;
590   PetscInt       m = b->AIJ->m,n,i,*idx;
591 
592   PetscFunctionBegin;
593   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
594   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
595   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
596 
597   for (i=0; i<m; i++) {
598     idx    = a->j + a->i[i] ;
599     v      = a->a + a->i[i] ;
600     n      = a->i[i+1] - a->i[i];
601     alpha1 = x[4*i];
602     alpha2 = x[4*i+1];
603     alpha3 = x[4*i+2];
604     alpha4 = x[4*i+3];
605     while (n-->0) {
606       y[4*(*idx)]   += alpha1*(*v);
607       y[4*(*idx)+1] += alpha2*(*v);
608       y[4*(*idx)+2] += alpha3*(*v);
609       y[4*(*idx)+3] += alpha4*(*v);
610       idx++; v++;
611     }
612   }
613   ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->n);CHKERRQ(ierr);
614   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
615   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
616   PetscFunctionReturn(0);
617 }
618 /* ------------------------------------------------------------------------------*/
619 
620 #undef __FUNCT__
621 #define __FUNCT__ "MatMult_SeqMAIJ_5"
622 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
623 {
624   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
625   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
626   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
627   PetscErrorCode ierr;
628   PetscInt       m = b->AIJ->m,*idx,*ii;
629   PetscInt       n,i,jrow,j;
630 
631   PetscFunctionBegin;
632   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
633   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
634   idx  = a->j;
635   v    = a->a;
636   ii   = a->i;
637 
638   for (i=0; i<m; i++) {
639     jrow = ii[i];
640     n    = ii[i+1] - jrow;
641     sum1  = 0.0;
642     sum2  = 0.0;
643     sum3  = 0.0;
644     sum4  = 0.0;
645     sum5  = 0.0;
646     for (j=0; j<n; j++) {
647       sum1 += v[jrow]*x[5*idx[jrow]];
648       sum2 += v[jrow]*x[5*idx[jrow]+1];
649       sum3 += v[jrow]*x[5*idx[jrow]+2];
650       sum4 += v[jrow]*x[5*idx[jrow]+3];
651       sum5 += v[jrow]*x[5*idx[jrow]+4];
652       jrow++;
653      }
654     y[5*i]   = sum1;
655     y[5*i+1] = sum2;
656     y[5*i+2] = sum3;
657     y[5*i+3] = sum4;
658     y[5*i+4] = sum5;
659   }
660 
661   ierr = PetscLogFlops(10*a->nz - 5*m);CHKERRQ(ierr);
662   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
663   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
669 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
670 {
671   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
672   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
673   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
674   PetscErrorCode ierr;
675   PetscInt       m = b->AIJ->m,n,i,*idx;
676 
677   PetscFunctionBegin;
678   ierr = VecSet(yy,zero);CHKERRQ(ierr);
679   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
680   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
681 
682   for (i=0; i<m; i++) {
683     idx    = a->j + a->i[i] ;
684     v      = a->a + a->i[i] ;
685     n      = a->i[i+1] - a->i[i];
686     alpha1 = x[5*i];
687     alpha2 = x[5*i+1];
688     alpha3 = x[5*i+2];
689     alpha4 = x[5*i+3];
690     alpha5 = x[5*i+4];
691     while (n-->0) {
692       y[5*(*idx)]   += alpha1*(*v);
693       y[5*(*idx)+1] += alpha2*(*v);
694       y[5*(*idx)+2] += alpha3*(*v);
695       y[5*(*idx)+3] += alpha4*(*v);
696       y[5*(*idx)+4] += alpha5*(*v);
697       idx++; v++;
698     }
699   }
700   ierr = PetscLogFlops(10*a->nz - 5*b->AIJ->n);CHKERRQ(ierr);
701   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
702   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
703   PetscFunctionReturn(0);
704 }
705 
706 #undef __FUNCT__
707 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
708 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
709 {
710   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
711   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
712   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
713   PetscErrorCode ierr;
714   PetscInt       m = b->AIJ->m,*idx,*ii;
715   PetscInt       n,i,jrow,j;
716 
717   PetscFunctionBegin;
718   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
719   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
720   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
721   idx  = a->j;
722   v    = a->a;
723   ii   = a->i;
724 
725   for (i=0; i<m; i++) {
726     jrow = ii[i];
727     n    = ii[i+1] - jrow;
728     sum1  = 0.0;
729     sum2  = 0.0;
730     sum3  = 0.0;
731     sum4  = 0.0;
732     sum5  = 0.0;
733     for (j=0; j<n; j++) {
734       sum1 += v[jrow]*x[5*idx[jrow]];
735       sum2 += v[jrow]*x[5*idx[jrow]+1];
736       sum3 += v[jrow]*x[5*idx[jrow]+2];
737       sum4 += v[jrow]*x[5*idx[jrow]+3];
738       sum5 += v[jrow]*x[5*idx[jrow]+4];
739       jrow++;
740      }
741     y[5*i]   += sum1;
742     y[5*i+1] += sum2;
743     y[5*i+2] += sum3;
744     y[5*i+3] += sum4;
745     y[5*i+4] += sum5;
746   }
747 
748   ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr);
749   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
750   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
751   PetscFunctionReturn(0);
752 }
753 
754 #undef __FUNCT__
755 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
756 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
757 {
758   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
759   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
760   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
761   PetscErrorCode ierr;
762   PetscInt       m = b->AIJ->m,n,i,*idx;
763 
764   PetscFunctionBegin;
765   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
766   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
767   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
768 
769   for (i=0; i<m; i++) {
770     idx    = a->j + a->i[i] ;
771     v      = a->a + a->i[i] ;
772     n      = a->i[i+1] - a->i[i];
773     alpha1 = x[5*i];
774     alpha2 = x[5*i+1];
775     alpha3 = x[5*i+2];
776     alpha4 = x[5*i+3];
777     alpha5 = x[5*i+4];
778     while (n-->0) {
779       y[5*(*idx)]   += alpha1*(*v);
780       y[5*(*idx)+1] += alpha2*(*v);
781       y[5*(*idx)+2] += alpha3*(*v);
782       y[5*(*idx)+3] += alpha4*(*v);
783       y[5*(*idx)+4] += alpha5*(*v);
784       idx++; v++;
785     }
786   }
787   ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr);
788   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
789   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
790   PetscFunctionReturn(0);
791 }
792 
793 /* ------------------------------------------------------------------------------*/
794 #undef __FUNCT__
795 #define __FUNCT__ "MatMult_SeqMAIJ_6"
796 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
797 {
798   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
799   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
800   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
801   PetscErrorCode ierr;
802   PetscInt       m = b->AIJ->m,*idx,*ii;
803   PetscInt       n,i,jrow,j;
804 
805   PetscFunctionBegin;
806   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
807   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
808   idx  = a->j;
809   v    = a->a;
810   ii   = a->i;
811 
812   for (i=0; i<m; i++) {
813     jrow = ii[i];
814     n    = ii[i+1] - jrow;
815     sum1  = 0.0;
816     sum2  = 0.0;
817     sum3  = 0.0;
818     sum4  = 0.0;
819     sum5  = 0.0;
820     sum6  = 0.0;
821     for (j=0; j<n; j++) {
822       sum1 += v[jrow]*x[6*idx[jrow]];
823       sum2 += v[jrow]*x[6*idx[jrow]+1];
824       sum3 += v[jrow]*x[6*idx[jrow]+2];
825       sum4 += v[jrow]*x[6*idx[jrow]+3];
826       sum5 += v[jrow]*x[6*idx[jrow]+4];
827       sum6 += v[jrow]*x[6*idx[jrow]+5];
828       jrow++;
829      }
830     y[6*i]   = sum1;
831     y[6*i+1] = sum2;
832     y[6*i+2] = sum3;
833     y[6*i+3] = sum4;
834     y[6*i+4] = sum5;
835     y[6*i+5] = sum6;
836   }
837 
838   ierr = PetscLogFlops(12*a->nz - 6*m);CHKERRQ(ierr);
839   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
840   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
841   PetscFunctionReturn(0);
842 }
843 
844 #undef __FUNCT__
845 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
846 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
847 {
848   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
849   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
850   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
851   PetscErrorCode ierr;
852   PetscInt       m = b->AIJ->m,n,i,*idx;
853 
854   PetscFunctionBegin;
855   ierr = VecSet(yy,zero);CHKERRQ(ierr);
856   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
857   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
858 
859   for (i=0; i<m; i++) {
860     idx    = a->j + a->i[i] ;
861     v      = a->a + a->i[i] ;
862     n      = a->i[i+1] - a->i[i];
863     alpha1 = x[6*i];
864     alpha2 = x[6*i+1];
865     alpha3 = x[6*i+2];
866     alpha4 = x[6*i+3];
867     alpha5 = x[6*i+4];
868     alpha6 = x[6*i+5];
869     while (n-->0) {
870       y[6*(*idx)]   += alpha1*(*v);
871       y[6*(*idx)+1] += alpha2*(*v);
872       y[6*(*idx)+2] += alpha3*(*v);
873       y[6*(*idx)+3] += alpha4*(*v);
874       y[6*(*idx)+4] += alpha5*(*v);
875       y[6*(*idx)+5] += alpha6*(*v);
876       idx++; v++;
877     }
878   }
879   ierr = PetscLogFlops(12*a->nz - 6*b->AIJ->n);CHKERRQ(ierr);
880   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
881   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
882   PetscFunctionReturn(0);
883 }
884 
885 #undef __FUNCT__
886 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
887 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
888 {
889   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
890   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
891   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
892   PetscErrorCode ierr;
893   PetscInt       m = b->AIJ->m,*idx,*ii;
894   PetscInt       n,i,jrow,j;
895 
896   PetscFunctionBegin;
897   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
898   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
899   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
900   idx  = a->j;
901   v    = a->a;
902   ii   = a->i;
903 
904   for (i=0; i<m; i++) {
905     jrow = ii[i];
906     n    = ii[i+1] - jrow;
907     sum1  = 0.0;
908     sum2  = 0.0;
909     sum3  = 0.0;
910     sum4  = 0.0;
911     sum5  = 0.0;
912     sum6  = 0.0;
913     for (j=0; j<n; j++) {
914       sum1 += v[jrow]*x[6*idx[jrow]];
915       sum2 += v[jrow]*x[6*idx[jrow]+1];
916       sum3 += v[jrow]*x[6*idx[jrow]+2];
917       sum4 += v[jrow]*x[6*idx[jrow]+3];
918       sum5 += v[jrow]*x[6*idx[jrow]+4];
919       sum6 += v[jrow]*x[6*idx[jrow]+5];
920       jrow++;
921      }
922     y[6*i]   += sum1;
923     y[6*i+1] += sum2;
924     y[6*i+2] += sum3;
925     y[6*i+3] += sum4;
926     y[6*i+4] += sum5;
927     y[6*i+5] += sum6;
928   }
929 
930   ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr);
931   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
932   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
933   PetscFunctionReturn(0);
934 }
935 
936 #undef __FUNCT__
937 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
938 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
939 {
940   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
941   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
942   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
943   PetscErrorCode ierr;
944   PetscInt       m = b->AIJ->m,n,i,*idx;
945 
946   PetscFunctionBegin;
947   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
948   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
949   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
950 
951   for (i=0; i<m; i++) {
952     idx    = a->j + a->i[i] ;
953     v      = a->a + a->i[i] ;
954     n      = a->i[i+1] - a->i[i];
955     alpha1 = x[6*i];
956     alpha2 = x[6*i+1];
957     alpha3 = x[6*i+2];
958     alpha4 = x[6*i+3];
959     alpha5 = x[6*i+4];
960     alpha6 = x[6*i+5];
961     while (n-->0) {
962       y[6*(*idx)]   += alpha1*(*v);
963       y[6*(*idx)+1] += alpha2*(*v);
964       y[6*(*idx)+2] += alpha3*(*v);
965       y[6*(*idx)+3] += alpha4*(*v);
966       y[6*(*idx)+4] += alpha5*(*v);
967       y[6*(*idx)+5] += alpha6*(*v);
968       idx++; v++;
969     }
970   }
971   ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr);
972   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
973   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
974   PetscFunctionReturn(0);
975 }
976 
977 /* ------------------------------------------------------------------------------*/
978 #undef __FUNCT__
979 #define __FUNCT__ "MatMult_SeqMAIJ_7"
980 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
981 {
982   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
983   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
984   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
985   PetscErrorCode ierr;
986   PetscInt       m = b->AIJ->m,*idx,*ii;
987   PetscInt       n,i,jrow,j;
988 
989   PetscFunctionBegin;
990   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
991   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
992   idx  = a->j;
993   v    = a->a;
994   ii   = a->i;
995 
996   for (i=0; i<m; i++) {
997     jrow = ii[i];
998     n    = ii[i+1] - jrow;
999     sum1  = 0.0;
1000     sum2  = 0.0;
1001     sum3  = 0.0;
1002     sum4  = 0.0;
1003     sum5  = 0.0;
1004     sum6  = 0.0;
1005     sum7  = 0.0;
1006     for (j=0; j<n; j++) {
1007       sum1 += v[jrow]*x[7*idx[jrow]];
1008       sum2 += v[jrow]*x[7*idx[jrow]+1];
1009       sum3 += v[jrow]*x[7*idx[jrow]+2];
1010       sum4 += v[jrow]*x[7*idx[jrow]+3];
1011       sum5 += v[jrow]*x[7*idx[jrow]+4];
1012       sum6 += v[jrow]*x[7*idx[jrow]+5];
1013       sum7 += v[jrow]*x[7*idx[jrow]+6];
1014       jrow++;
1015      }
1016     y[7*i]   = sum1;
1017     y[7*i+1] = sum2;
1018     y[7*i+2] = sum3;
1019     y[7*i+3] = sum4;
1020     y[7*i+4] = sum5;
1021     y[7*i+5] = sum6;
1022     y[7*i+6] = sum7;
1023   }
1024 
1025   ierr = PetscLogFlops(14*a->nz - 7*m);CHKERRQ(ierr);
1026   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1027   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 #undef __FUNCT__
1032 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1033 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1034 {
1035   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1036   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1037   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1038   PetscErrorCode ierr;
1039   PetscInt       m = b->AIJ->m,n,i,*idx;
1040 
1041   PetscFunctionBegin;
1042   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1043   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1044   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1045 
1046   for (i=0; i<m; i++) {
1047     idx    = a->j + a->i[i] ;
1048     v      = a->a + a->i[i] ;
1049     n      = a->i[i+1] - a->i[i];
1050     alpha1 = x[7*i];
1051     alpha2 = x[7*i+1];
1052     alpha3 = x[7*i+2];
1053     alpha4 = x[7*i+3];
1054     alpha5 = x[7*i+4];
1055     alpha6 = x[7*i+5];
1056     alpha7 = x[7*i+6];
1057     while (n-->0) {
1058       y[7*(*idx)]   += alpha1*(*v);
1059       y[7*(*idx)+1] += alpha2*(*v);
1060       y[7*(*idx)+2] += alpha3*(*v);
1061       y[7*(*idx)+3] += alpha4*(*v);
1062       y[7*(*idx)+4] += alpha5*(*v);
1063       y[7*(*idx)+5] += alpha6*(*v);
1064       y[7*(*idx)+6] += alpha7*(*v);
1065       idx++; v++;
1066     }
1067   }
1068   ierr = PetscLogFlops(14*a->nz - 7*b->AIJ->n);CHKERRQ(ierr);
1069   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1070   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 #undef __FUNCT__
1075 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1076 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1077 {
1078   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1079   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1080   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1081   PetscErrorCode ierr;
1082   PetscInt       m = b->AIJ->m,*idx,*ii;
1083   PetscInt       n,i,jrow,j;
1084 
1085   PetscFunctionBegin;
1086   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1087   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1088   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1089   idx  = a->j;
1090   v    = a->a;
1091   ii   = a->i;
1092 
1093   for (i=0; i<m; i++) {
1094     jrow = ii[i];
1095     n    = ii[i+1] - jrow;
1096     sum1  = 0.0;
1097     sum2  = 0.0;
1098     sum3  = 0.0;
1099     sum4  = 0.0;
1100     sum5  = 0.0;
1101     sum6  = 0.0;
1102     sum7  = 0.0;
1103     for (j=0; j<n; j++) {
1104       sum1 += v[jrow]*x[7*idx[jrow]];
1105       sum2 += v[jrow]*x[7*idx[jrow]+1];
1106       sum3 += v[jrow]*x[7*idx[jrow]+2];
1107       sum4 += v[jrow]*x[7*idx[jrow]+3];
1108       sum5 += v[jrow]*x[7*idx[jrow]+4];
1109       sum6 += v[jrow]*x[7*idx[jrow]+5];
1110       sum7 += v[jrow]*x[7*idx[jrow]+6];
1111       jrow++;
1112      }
1113     y[7*i]   += sum1;
1114     y[7*i+1] += sum2;
1115     y[7*i+2] += sum3;
1116     y[7*i+3] += sum4;
1117     y[7*i+4] += sum5;
1118     y[7*i+5] += sum6;
1119     y[7*i+6] += sum7;
1120   }
1121 
1122   ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr);
1123   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1124   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1130 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1131 {
1132   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1133   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1134   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1135   PetscErrorCode ierr;
1136   PetscInt       m = b->AIJ->m,n,i,*idx;
1137 
1138   PetscFunctionBegin;
1139   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1140   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1141   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1142   for (i=0; i<m; i++) {
1143     idx    = a->j + a->i[i] ;
1144     v      = a->a + a->i[i] ;
1145     n      = a->i[i+1] - a->i[i];
1146     alpha1 = x[7*i];
1147     alpha2 = x[7*i+1];
1148     alpha3 = x[7*i+2];
1149     alpha4 = x[7*i+3];
1150     alpha5 = x[7*i+4];
1151     alpha6 = x[7*i+5];
1152     alpha7 = x[7*i+6];
1153     while (n-->0) {
1154       y[7*(*idx)]   += alpha1*(*v);
1155       y[7*(*idx)+1] += alpha2*(*v);
1156       y[7*(*idx)+2] += alpha3*(*v);
1157       y[7*(*idx)+3] += alpha4*(*v);
1158       y[7*(*idx)+4] += alpha5*(*v);
1159       y[7*(*idx)+5] += alpha6*(*v);
1160       y[7*(*idx)+6] += alpha7*(*v);
1161       idx++; v++;
1162     }
1163   }
1164   ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr);
1165   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1166   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatMult_SeqMAIJ_8"
1172 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1173 {
1174   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1175   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1176   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1177   PetscErrorCode ierr;
1178   PetscInt       m = b->AIJ->m,*idx,*ii;
1179   PetscInt       n,i,jrow,j;
1180 
1181   PetscFunctionBegin;
1182   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1183   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1184   idx  = a->j;
1185   v    = a->a;
1186   ii   = a->i;
1187 
1188   for (i=0; i<m; i++) {
1189     jrow = ii[i];
1190     n    = ii[i+1] - jrow;
1191     sum1  = 0.0;
1192     sum2  = 0.0;
1193     sum3  = 0.0;
1194     sum4  = 0.0;
1195     sum5  = 0.0;
1196     sum6  = 0.0;
1197     sum7  = 0.0;
1198     sum8  = 0.0;
1199     for (j=0; j<n; j++) {
1200       sum1 += v[jrow]*x[8*idx[jrow]];
1201       sum2 += v[jrow]*x[8*idx[jrow]+1];
1202       sum3 += v[jrow]*x[8*idx[jrow]+2];
1203       sum4 += v[jrow]*x[8*idx[jrow]+3];
1204       sum5 += v[jrow]*x[8*idx[jrow]+4];
1205       sum6 += v[jrow]*x[8*idx[jrow]+5];
1206       sum7 += v[jrow]*x[8*idx[jrow]+6];
1207       sum8 += v[jrow]*x[8*idx[jrow]+7];
1208       jrow++;
1209      }
1210     y[8*i]   = sum1;
1211     y[8*i+1] = sum2;
1212     y[8*i+2] = sum3;
1213     y[8*i+3] = sum4;
1214     y[8*i+4] = sum5;
1215     y[8*i+5] = sum6;
1216     y[8*i+6] = sum7;
1217     y[8*i+7] = sum8;
1218   }
1219 
1220   ierr = PetscLogFlops(16*a->nz - 8*m);CHKERRQ(ierr);
1221   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1222   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 #undef __FUNCT__
1227 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1228 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1229 {
1230   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1231   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1232   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1233   PetscErrorCode ierr;
1234   PetscInt       m = b->AIJ->m,n,i,*idx;
1235 
1236   PetscFunctionBegin;
1237   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1238   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1239   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1240 
1241   for (i=0; i<m; i++) {
1242     idx    = a->j + a->i[i] ;
1243     v      = a->a + a->i[i] ;
1244     n      = a->i[i+1] - a->i[i];
1245     alpha1 = x[8*i];
1246     alpha2 = x[8*i+1];
1247     alpha3 = x[8*i+2];
1248     alpha4 = x[8*i+3];
1249     alpha5 = x[8*i+4];
1250     alpha6 = x[8*i+5];
1251     alpha7 = x[8*i+6];
1252     alpha8 = x[8*i+7];
1253     while (n-->0) {
1254       y[8*(*idx)]   += alpha1*(*v);
1255       y[8*(*idx)+1] += alpha2*(*v);
1256       y[8*(*idx)+2] += alpha3*(*v);
1257       y[8*(*idx)+3] += alpha4*(*v);
1258       y[8*(*idx)+4] += alpha5*(*v);
1259       y[8*(*idx)+5] += alpha6*(*v);
1260       y[8*(*idx)+6] += alpha7*(*v);
1261       y[8*(*idx)+7] += alpha8*(*v);
1262       idx++; v++;
1263     }
1264   }
1265   ierr = PetscLogFlops(16*a->nz - 8*b->AIJ->n);CHKERRQ(ierr);
1266   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1267   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1273 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1274 {
1275   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1276   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1277   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1278   PetscErrorCode ierr;
1279   PetscInt       m = b->AIJ->m,*idx,*ii;
1280   PetscInt       n,i,jrow,j;
1281 
1282   PetscFunctionBegin;
1283   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1284   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1285   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1286   idx  = a->j;
1287   v    = a->a;
1288   ii   = a->i;
1289 
1290   for (i=0; i<m; i++) {
1291     jrow = ii[i];
1292     n    = ii[i+1] - jrow;
1293     sum1  = 0.0;
1294     sum2  = 0.0;
1295     sum3  = 0.0;
1296     sum4  = 0.0;
1297     sum5  = 0.0;
1298     sum6  = 0.0;
1299     sum7  = 0.0;
1300     sum8  = 0.0;
1301     for (j=0; j<n; j++) {
1302       sum1 += v[jrow]*x[8*idx[jrow]];
1303       sum2 += v[jrow]*x[8*idx[jrow]+1];
1304       sum3 += v[jrow]*x[8*idx[jrow]+2];
1305       sum4 += v[jrow]*x[8*idx[jrow]+3];
1306       sum5 += v[jrow]*x[8*idx[jrow]+4];
1307       sum6 += v[jrow]*x[8*idx[jrow]+5];
1308       sum7 += v[jrow]*x[8*idx[jrow]+6];
1309       sum8 += v[jrow]*x[8*idx[jrow]+7];
1310       jrow++;
1311      }
1312     y[8*i]   += sum1;
1313     y[8*i+1] += sum2;
1314     y[8*i+2] += sum3;
1315     y[8*i+3] += sum4;
1316     y[8*i+4] += sum5;
1317     y[8*i+5] += sum6;
1318     y[8*i+6] += sum7;
1319     y[8*i+7] += sum8;
1320   }
1321 
1322   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
1323   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1324   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1330 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1331 {
1332   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1333   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1334   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1335   PetscErrorCode ierr;
1336   PetscInt       m = b->AIJ->m,n,i,*idx;
1337 
1338   PetscFunctionBegin;
1339   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1340   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1341   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1342   for (i=0; i<m; i++) {
1343     idx    = a->j + a->i[i] ;
1344     v      = a->a + a->i[i] ;
1345     n      = a->i[i+1] - a->i[i];
1346     alpha1 = x[8*i];
1347     alpha2 = x[8*i+1];
1348     alpha3 = x[8*i+2];
1349     alpha4 = x[8*i+3];
1350     alpha5 = x[8*i+4];
1351     alpha6 = x[8*i+5];
1352     alpha7 = x[8*i+6];
1353     alpha8 = x[8*i+7];
1354     while (n-->0) {
1355       y[8*(*idx)]   += alpha1*(*v);
1356       y[8*(*idx)+1] += alpha2*(*v);
1357       y[8*(*idx)+2] += alpha3*(*v);
1358       y[8*(*idx)+3] += alpha4*(*v);
1359       y[8*(*idx)+4] += alpha5*(*v);
1360       y[8*(*idx)+5] += alpha6*(*v);
1361       y[8*(*idx)+6] += alpha7*(*v);
1362       y[8*(*idx)+7] += alpha8*(*v);
1363       idx++; v++;
1364     }
1365   }
1366   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
1367   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1368   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 /* ------------------------------------------------------------------------------*/
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatMult_SeqMAIJ_9"
1375 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1376 {
1377   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1378   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1379   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1380   PetscErrorCode ierr;
1381   PetscInt       m = b->AIJ->m,*idx,*ii;
1382   PetscInt       n,i,jrow,j;
1383 
1384   PetscFunctionBegin;
1385   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1386   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1387   idx  = a->j;
1388   v    = a->a;
1389   ii   = a->i;
1390 
1391   for (i=0; i<m; i++) {
1392     jrow = ii[i];
1393     n    = ii[i+1] - jrow;
1394     sum1  = 0.0;
1395     sum2  = 0.0;
1396     sum3  = 0.0;
1397     sum4  = 0.0;
1398     sum5  = 0.0;
1399     sum6  = 0.0;
1400     sum7  = 0.0;
1401     sum8  = 0.0;
1402     sum9  = 0.0;
1403     for (j=0; j<n; j++) {
1404       sum1 += v[jrow]*x[9*idx[jrow]];
1405       sum2 += v[jrow]*x[9*idx[jrow]+1];
1406       sum3 += v[jrow]*x[9*idx[jrow]+2];
1407       sum4 += v[jrow]*x[9*idx[jrow]+3];
1408       sum5 += v[jrow]*x[9*idx[jrow]+4];
1409       sum6 += v[jrow]*x[9*idx[jrow]+5];
1410       sum7 += v[jrow]*x[9*idx[jrow]+6];
1411       sum8 += v[jrow]*x[9*idx[jrow]+7];
1412       sum9 += v[jrow]*x[9*idx[jrow]+8];
1413       jrow++;
1414      }
1415     y[9*i]   = sum1;
1416     y[9*i+1] = sum2;
1417     y[9*i+2] = sum3;
1418     y[9*i+3] = sum4;
1419     y[9*i+4] = sum5;
1420     y[9*i+5] = sum6;
1421     y[9*i+6] = sum7;
1422     y[9*i+7] = sum8;
1423     y[9*i+8] = sum9;
1424   }
1425 
1426   ierr = PetscLogFlops(18*a->nz - 9*m);CHKERRQ(ierr);
1427   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1428   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 /* ------------------------------------------------------------------------------*/
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1436 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1437 {
1438   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1439   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1440   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1441   PetscErrorCode ierr;
1442   PetscInt       m = b->AIJ->m,n,i,*idx;
1443 
1444   PetscFunctionBegin;
1445   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1446   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1447   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1448 
1449   for (i=0; i<m; i++) {
1450     idx    = a->j + a->i[i] ;
1451     v      = a->a + a->i[i] ;
1452     n      = a->i[i+1] - a->i[i];
1453     alpha1 = x[9*i];
1454     alpha2 = x[9*i+1];
1455     alpha3 = x[9*i+2];
1456     alpha4 = x[9*i+3];
1457     alpha5 = x[9*i+4];
1458     alpha6 = x[9*i+5];
1459     alpha7 = x[9*i+6];
1460     alpha8 = x[9*i+7];
1461     alpha9 = x[9*i+8];
1462     while (n-->0) {
1463       y[9*(*idx)]   += alpha1*(*v);
1464       y[9*(*idx)+1] += alpha2*(*v);
1465       y[9*(*idx)+2] += alpha3*(*v);
1466       y[9*(*idx)+3] += alpha4*(*v);
1467       y[9*(*idx)+4] += alpha5*(*v);
1468       y[9*(*idx)+5] += alpha6*(*v);
1469       y[9*(*idx)+6] += alpha7*(*v);
1470       y[9*(*idx)+7] += alpha8*(*v);
1471       y[9*(*idx)+8] += alpha9*(*v);
1472       idx++; v++;
1473     }
1474   }
1475   ierr = PetscLogFlops(18*a->nz - 9*b->AIJ->n);CHKERRQ(ierr);
1476   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1477   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 #undef __FUNCT__
1482 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1483 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1484 {
1485   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1486   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1487   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1488   PetscErrorCode ierr;
1489   PetscInt       m = b->AIJ->m,*idx,*ii;
1490   PetscInt       n,i,jrow,j;
1491 
1492   PetscFunctionBegin;
1493   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1494   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1495   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1496   idx  = a->j;
1497   v    = a->a;
1498   ii   = a->i;
1499 
1500   for (i=0; i<m; i++) {
1501     jrow = ii[i];
1502     n    = ii[i+1] - jrow;
1503     sum1  = 0.0;
1504     sum2  = 0.0;
1505     sum3  = 0.0;
1506     sum4  = 0.0;
1507     sum5  = 0.0;
1508     sum6  = 0.0;
1509     sum7  = 0.0;
1510     sum8  = 0.0;
1511     sum9  = 0.0;
1512     for (j=0; j<n; j++) {
1513       sum1 += v[jrow]*x[9*idx[jrow]];
1514       sum2 += v[jrow]*x[9*idx[jrow]+1];
1515       sum3 += v[jrow]*x[9*idx[jrow]+2];
1516       sum4 += v[jrow]*x[9*idx[jrow]+3];
1517       sum5 += v[jrow]*x[9*idx[jrow]+4];
1518       sum6 += v[jrow]*x[9*idx[jrow]+5];
1519       sum7 += v[jrow]*x[9*idx[jrow]+6];
1520       sum8 += v[jrow]*x[9*idx[jrow]+7];
1521       sum9 += v[jrow]*x[9*idx[jrow]+8];
1522       jrow++;
1523      }
1524     y[9*i]   += sum1;
1525     y[9*i+1] += sum2;
1526     y[9*i+2] += sum3;
1527     y[9*i+3] += sum4;
1528     y[9*i+4] += sum5;
1529     y[9*i+5] += sum6;
1530     y[9*i+6] += sum7;
1531     y[9*i+7] += sum8;
1532     y[9*i+8] += sum9;
1533   }
1534 
1535   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1536   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1537   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 #undef __FUNCT__
1542 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1543 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1544 {
1545   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1546   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1547   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1548   PetscErrorCode ierr;
1549   PetscInt       m = b->AIJ->m,n,i,*idx;
1550 
1551   PetscFunctionBegin;
1552   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1553   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1554   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1555   for (i=0; i<m; i++) {
1556     idx    = a->j + a->i[i] ;
1557     v      = a->a + a->i[i] ;
1558     n      = a->i[i+1] - a->i[i];
1559     alpha1 = x[9*i];
1560     alpha2 = x[9*i+1];
1561     alpha3 = x[9*i+2];
1562     alpha4 = x[9*i+3];
1563     alpha5 = x[9*i+4];
1564     alpha6 = x[9*i+5];
1565     alpha7 = x[9*i+6];
1566     alpha8 = x[9*i+7];
1567     alpha9 = x[9*i+8];
1568     while (n-->0) {
1569       y[9*(*idx)]   += alpha1*(*v);
1570       y[9*(*idx)+1] += alpha2*(*v);
1571       y[9*(*idx)+2] += alpha3*(*v);
1572       y[9*(*idx)+3] += alpha4*(*v);
1573       y[9*(*idx)+4] += alpha5*(*v);
1574       y[9*(*idx)+5] += alpha6*(*v);
1575       y[9*(*idx)+6] += alpha7*(*v);
1576       y[9*(*idx)+7] += alpha8*(*v);
1577       y[9*(*idx)+8] += alpha9*(*v);
1578       idx++; v++;
1579     }
1580   }
1581   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1582   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1583   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1584   PetscFunctionReturn(0);
1585 }
1586 /*--------------------------------------------------------------------------------------------*/
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1589 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1590 {
1591   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1592   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1593   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1594   PetscErrorCode ierr;
1595   PetscInt       m = b->AIJ->m,*idx,*ii;
1596   PetscInt       n,i,jrow,j;
1597 
1598   PetscFunctionBegin;
1599   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1600   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1601   idx  = a->j;
1602   v    = a->a;
1603   ii   = a->i;
1604 
1605   for (i=0; i<m; i++) {
1606     jrow = ii[i];
1607     n    = ii[i+1] - jrow;
1608     sum1  = 0.0;
1609     sum2  = 0.0;
1610     sum3  = 0.0;
1611     sum4  = 0.0;
1612     sum5  = 0.0;
1613     sum6  = 0.0;
1614     sum7  = 0.0;
1615     sum8  = 0.0;
1616     sum9  = 0.0;
1617     sum10 = 0.0;
1618     for (j=0; j<n; j++) {
1619       sum1  += v[jrow]*x[10*idx[jrow]];
1620       sum2  += v[jrow]*x[10*idx[jrow]+1];
1621       sum3  += v[jrow]*x[10*idx[jrow]+2];
1622       sum4  += v[jrow]*x[10*idx[jrow]+3];
1623       sum5  += v[jrow]*x[10*idx[jrow]+4];
1624       sum6  += v[jrow]*x[10*idx[jrow]+5];
1625       sum7  += v[jrow]*x[10*idx[jrow]+6];
1626       sum8  += v[jrow]*x[10*idx[jrow]+7];
1627       sum9  += v[jrow]*x[10*idx[jrow]+8];
1628       sum10 += v[jrow]*x[10*idx[jrow]+9];
1629       jrow++;
1630      }
1631     y[10*i]   = sum1;
1632     y[10*i+1] = sum2;
1633     y[10*i+2] = sum3;
1634     y[10*i+3] = sum4;
1635     y[10*i+4] = sum5;
1636     y[10*i+5] = sum6;
1637     y[10*i+6] = sum7;
1638     y[10*i+7] = sum8;
1639     y[10*i+8] = sum9;
1640     y[10*i+9] = sum10;
1641   }
1642 
1643   ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr);
1644   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1645   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 #undef __FUNCT__
1650 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1651 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1652 {
1653   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1654   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1655   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1656   PetscErrorCode ierr;
1657   PetscInt       m = b->AIJ->m,*idx,*ii;
1658   PetscInt       n,i,jrow,j;
1659 
1660   PetscFunctionBegin;
1661   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1662   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1663   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1664   idx  = a->j;
1665   v    = a->a;
1666   ii   = a->i;
1667 
1668   for (i=0; i<m; i++) {
1669     jrow = ii[i];
1670     n    = ii[i+1] - jrow;
1671     sum1  = 0.0;
1672     sum2  = 0.0;
1673     sum3  = 0.0;
1674     sum4  = 0.0;
1675     sum5  = 0.0;
1676     sum6  = 0.0;
1677     sum7  = 0.0;
1678     sum8  = 0.0;
1679     sum9  = 0.0;
1680     sum10 = 0.0;
1681     for (j=0; j<n; j++) {
1682       sum1  += v[jrow]*x[10*idx[jrow]];
1683       sum2  += v[jrow]*x[10*idx[jrow]+1];
1684       sum3  += v[jrow]*x[10*idx[jrow]+2];
1685       sum4  += v[jrow]*x[10*idx[jrow]+3];
1686       sum5  += v[jrow]*x[10*idx[jrow]+4];
1687       sum6  += v[jrow]*x[10*idx[jrow]+5];
1688       sum7  += v[jrow]*x[10*idx[jrow]+6];
1689       sum8  += v[jrow]*x[10*idx[jrow]+7];
1690       sum9  += v[jrow]*x[10*idx[jrow]+8];
1691       sum10 += v[jrow]*x[10*idx[jrow]+9];
1692       jrow++;
1693      }
1694     y[10*i]   += sum1;
1695     y[10*i+1] += sum2;
1696     y[10*i+2] += sum3;
1697     y[10*i+3] += sum4;
1698     y[10*i+4] += sum5;
1699     y[10*i+5] += sum6;
1700     y[10*i+6] += sum7;
1701     y[10*i+7] += sum8;
1702     y[10*i+8] += sum9;
1703     y[10*i+9] += sum10;
1704   }
1705 
1706   ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr);
1707   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1708   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 #undef __FUNCT__
1713 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1714 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1715 {
1716   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1717   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1718   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1719   PetscErrorCode ierr;
1720   PetscInt       m = b->AIJ->m,n,i,*idx;
1721 
1722   PetscFunctionBegin;
1723   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1724   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1725   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1726 
1727   for (i=0; i<m; i++) {
1728     idx    = a->j + a->i[i] ;
1729     v      = a->a + a->i[i] ;
1730     n      = a->i[i+1] - a->i[i];
1731     alpha1 = x[9*i];
1732     alpha2 = x[9*i+1];
1733     alpha3 = x[9*i+2];
1734     alpha4 = x[9*i+3];
1735     alpha5 = x[9*i+4];
1736     alpha6 = x[9*i+5];
1737     alpha7 = x[9*i+6];
1738     alpha8 = x[9*i+7];
1739     alpha9 = x[9*i+8];
1740     alpha10 = x[10*i+9];
1741     while (n-->0) {
1742       y[9*(*idx)]   += alpha1*(*v);
1743       y[9*(*idx)+1] += alpha2*(*v);
1744       y[9*(*idx)+2] += alpha3*(*v);
1745       y[9*(*idx)+3] += alpha4*(*v);
1746       y[9*(*idx)+4] += alpha5*(*v);
1747       y[9*(*idx)+5] += alpha6*(*v);
1748       y[9*(*idx)+6] += alpha7*(*v);
1749       y[9*(*idx)+7] += alpha8*(*v);
1750       y[9*(*idx)+8] += alpha9*(*v);
1751       y[10*(*idx)+9] += alpha10*(*v);
1752       idx++; v++;
1753     }
1754   }
1755   ierr = PetscLogFlops(20*a->nz - 10*b->AIJ->n);CHKERRQ(ierr);
1756   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1757   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1763 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1764 {
1765   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1766   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1767   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1768   PetscErrorCode ierr;
1769   PetscInt       m = b->AIJ->m,n,i,*idx;
1770 
1771   PetscFunctionBegin;
1772   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1773   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1774   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1775   for (i=0; i<m; i++) {
1776     idx    = a->j + a->i[i] ;
1777     v      = a->a + a->i[i] ;
1778     n      = a->i[i+1] - a->i[i];
1779     alpha1 = x[10*i];
1780     alpha2 = x[10*i+1];
1781     alpha3 = x[10*i+2];
1782     alpha4 = x[10*i+3];
1783     alpha5 = x[10*i+4];
1784     alpha6 = x[10*i+5];
1785     alpha7 = x[10*i+6];
1786     alpha8 = x[10*i+7];
1787     alpha9 = x[10*i+8];
1788     alpha10 = x[10*i+9];
1789     while (n-->0) {
1790       y[10*(*idx)]   += alpha1*(*v);
1791       y[10*(*idx)+1] += alpha2*(*v);
1792       y[10*(*idx)+2] += alpha3*(*v);
1793       y[10*(*idx)+3] += alpha4*(*v);
1794       y[10*(*idx)+4] += alpha5*(*v);
1795       y[10*(*idx)+5] += alpha6*(*v);
1796       y[10*(*idx)+6] += alpha7*(*v);
1797       y[10*(*idx)+7] += alpha8*(*v);
1798       y[10*(*idx)+8] += alpha9*(*v);
1799       y[10*(*idx)+9] += alpha10*(*v);
1800       idx++; v++;
1801     }
1802   }
1803   ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr);
1804   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1805   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 
1810 /*--------------------------------------------------------------------------------------------*/
1811 #undef __FUNCT__
1812 #define __FUNCT__ "MatMult_SeqMAIJ_16"
1813 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1814 {
1815   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1816   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1817   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1818   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1819   PetscErrorCode ierr;
1820   PetscInt       m = b->AIJ->m,*idx,*ii;
1821   PetscInt       n,i,jrow,j;
1822 
1823   PetscFunctionBegin;
1824   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1825   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1826   idx  = a->j;
1827   v    = a->a;
1828   ii   = a->i;
1829 
1830   for (i=0; i<m; i++) {
1831     jrow = ii[i];
1832     n    = ii[i+1] - jrow;
1833     sum1  = 0.0;
1834     sum2  = 0.0;
1835     sum3  = 0.0;
1836     sum4  = 0.0;
1837     sum5  = 0.0;
1838     sum6  = 0.0;
1839     sum7  = 0.0;
1840     sum8  = 0.0;
1841     sum9  = 0.0;
1842     sum10 = 0.0;
1843     sum11 = 0.0;
1844     sum12 = 0.0;
1845     sum13 = 0.0;
1846     sum14 = 0.0;
1847     sum15 = 0.0;
1848     sum16 = 0.0;
1849     for (j=0; j<n; j++) {
1850       sum1  += v[jrow]*x[16*idx[jrow]];
1851       sum2  += v[jrow]*x[16*idx[jrow]+1];
1852       sum3  += v[jrow]*x[16*idx[jrow]+2];
1853       sum4  += v[jrow]*x[16*idx[jrow]+3];
1854       sum5  += v[jrow]*x[16*idx[jrow]+4];
1855       sum6  += v[jrow]*x[16*idx[jrow]+5];
1856       sum7  += v[jrow]*x[16*idx[jrow]+6];
1857       sum8  += v[jrow]*x[16*idx[jrow]+7];
1858       sum9  += v[jrow]*x[16*idx[jrow]+8];
1859       sum10 += v[jrow]*x[16*idx[jrow]+9];
1860       sum11 += v[jrow]*x[16*idx[jrow]+10];
1861       sum12 += v[jrow]*x[16*idx[jrow]+11];
1862       sum13 += v[jrow]*x[16*idx[jrow]+12];
1863       sum14 += v[jrow]*x[16*idx[jrow]+13];
1864       sum15 += v[jrow]*x[16*idx[jrow]+14];
1865       sum16 += v[jrow]*x[16*idx[jrow]+15];
1866       jrow++;
1867      }
1868     y[16*i]    = sum1;
1869     y[16*i+1]  = sum2;
1870     y[16*i+2]  = sum3;
1871     y[16*i+3]  = sum4;
1872     y[16*i+4]  = sum5;
1873     y[16*i+5]  = sum6;
1874     y[16*i+6]  = sum7;
1875     y[16*i+7]  = sum8;
1876     y[16*i+8]  = sum9;
1877     y[16*i+9]  = sum10;
1878     y[16*i+10] = sum11;
1879     y[16*i+11] = sum12;
1880     y[16*i+12] = sum13;
1881     y[16*i+13] = sum14;
1882     y[16*i+14] = sum15;
1883     y[16*i+15] = sum16;
1884   }
1885 
1886   ierr = PetscLogFlops(32*a->nz - 16*m);CHKERRQ(ierr);
1887   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1888   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1889   PetscFunctionReturn(0);
1890 }
1891 
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1894 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1895 {
1896   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1897   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1898   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1899   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1900   PetscErrorCode ierr;
1901   PetscInt       m = b->AIJ->m,n,i,*idx;
1902 
1903   PetscFunctionBegin;
1904   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1905   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1906   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1907 
1908   for (i=0; i<m; i++) {
1909     idx    = a->j + a->i[i] ;
1910     v      = a->a + a->i[i] ;
1911     n      = a->i[i+1] - a->i[i];
1912     alpha1  = x[16*i];
1913     alpha2  = x[16*i+1];
1914     alpha3  = x[16*i+2];
1915     alpha4  = x[16*i+3];
1916     alpha5  = x[16*i+4];
1917     alpha6  = x[16*i+5];
1918     alpha7  = x[16*i+6];
1919     alpha8  = x[16*i+7];
1920     alpha9  = x[16*i+8];
1921     alpha10 = x[16*i+9];
1922     alpha11 = x[16*i+10];
1923     alpha12 = x[16*i+11];
1924     alpha13 = x[16*i+12];
1925     alpha14 = x[16*i+13];
1926     alpha15 = x[16*i+14];
1927     alpha16 = x[16*i+15];
1928     while (n-->0) {
1929       y[16*(*idx)]    += alpha1*(*v);
1930       y[16*(*idx)+1]  += alpha2*(*v);
1931       y[16*(*idx)+2]  += alpha3*(*v);
1932       y[16*(*idx)+3]  += alpha4*(*v);
1933       y[16*(*idx)+4]  += alpha5*(*v);
1934       y[16*(*idx)+5]  += alpha6*(*v);
1935       y[16*(*idx)+6]  += alpha7*(*v);
1936       y[16*(*idx)+7]  += alpha8*(*v);
1937       y[16*(*idx)+8]  += alpha9*(*v);
1938       y[16*(*idx)+9]  += alpha10*(*v);
1939       y[16*(*idx)+10] += alpha11*(*v);
1940       y[16*(*idx)+11] += alpha12*(*v);
1941       y[16*(*idx)+12] += alpha13*(*v);
1942       y[16*(*idx)+13] += alpha14*(*v);
1943       y[16*(*idx)+14] += alpha15*(*v);
1944       y[16*(*idx)+15] += alpha16*(*v);
1945       idx++; v++;
1946     }
1947   }
1948   ierr = PetscLogFlops(32*a->nz - 16*b->AIJ->n);CHKERRQ(ierr);
1949   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1950   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1951   PetscFunctionReturn(0);
1952 }
1953 
1954 #undef __FUNCT__
1955 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
1956 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1957 {
1958   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1959   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1960   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1961   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1962   PetscErrorCode ierr;
1963   PetscInt       m = b->AIJ->m,*idx,*ii;
1964   PetscInt       n,i,jrow,j;
1965 
1966   PetscFunctionBegin;
1967   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1968   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1969   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1970   idx  = a->j;
1971   v    = a->a;
1972   ii   = a->i;
1973 
1974   for (i=0; i<m; i++) {
1975     jrow = ii[i];
1976     n    = ii[i+1] - jrow;
1977     sum1  = 0.0;
1978     sum2  = 0.0;
1979     sum3  = 0.0;
1980     sum4  = 0.0;
1981     sum5  = 0.0;
1982     sum6  = 0.0;
1983     sum7  = 0.0;
1984     sum8  = 0.0;
1985     sum9  = 0.0;
1986     sum10 = 0.0;
1987     sum11 = 0.0;
1988     sum12 = 0.0;
1989     sum13 = 0.0;
1990     sum14 = 0.0;
1991     sum15 = 0.0;
1992     sum16 = 0.0;
1993     for (j=0; j<n; j++) {
1994       sum1  += v[jrow]*x[16*idx[jrow]];
1995       sum2  += v[jrow]*x[16*idx[jrow]+1];
1996       sum3  += v[jrow]*x[16*idx[jrow]+2];
1997       sum4  += v[jrow]*x[16*idx[jrow]+3];
1998       sum5  += v[jrow]*x[16*idx[jrow]+4];
1999       sum6  += v[jrow]*x[16*idx[jrow]+5];
2000       sum7  += v[jrow]*x[16*idx[jrow]+6];
2001       sum8  += v[jrow]*x[16*idx[jrow]+7];
2002       sum9  += v[jrow]*x[16*idx[jrow]+8];
2003       sum10 += v[jrow]*x[16*idx[jrow]+9];
2004       sum11 += v[jrow]*x[16*idx[jrow]+10];
2005       sum12 += v[jrow]*x[16*idx[jrow]+11];
2006       sum13 += v[jrow]*x[16*idx[jrow]+12];
2007       sum14 += v[jrow]*x[16*idx[jrow]+13];
2008       sum15 += v[jrow]*x[16*idx[jrow]+14];
2009       sum16 += v[jrow]*x[16*idx[jrow]+15];
2010       jrow++;
2011      }
2012     y[16*i]    += sum1;
2013     y[16*i+1]  += sum2;
2014     y[16*i+2]  += sum3;
2015     y[16*i+3]  += sum4;
2016     y[16*i+4]  += sum5;
2017     y[16*i+5]  += sum6;
2018     y[16*i+6]  += sum7;
2019     y[16*i+7]  += sum8;
2020     y[16*i+8]  += sum9;
2021     y[16*i+9]  += sum10;
2022     y[16*i+10] += sum11;
2023     y[16*i+11] += sum12;
2024     y[16*i+12] += sum13;
2025     y[16*i+13] += sum14;
2026     y[16*i+14] += sum15;
2027     y[16*i+15] += sum16;
2028   }
2029 
2030   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
2031   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2032   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 #undef __FUNCT__
2037 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2038 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2039 {
2040   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2041   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2042   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2043   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2044   PetscErrorCode ierr;
2045   PetscInt       m = b->AIJ->m,n,i,*idx;
2046 
2047   PetscFunctionBegin;
2048   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2049   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2050   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2051   for (i=0; i<m; i++) {
2052     idx    = a->j + a->i[i] ;
2053     v      = a->a + a->i[i] ;
2054     n      = a->i[i+1] - a->i[i];
2055     alpha1 = x[16*i];
2056     alpha2 = x[16*i+1];
2057     alpha3 = x[16*i+2];
2058     alpha4 = x[16*i+3];
2059     alpha5 = x[16*i+4];
2060     alpha6 = x[16*i+5];
2061     alpha7 = x[16*i+6];
2062     alpha8 = x[16*i+7];
2063     alpha9  = x[16*i+8];
2064     alpha10 = x[16*i+9];
2065     alpha11 = x[16*i+10];
2066     alpha12 = x[16*i+11];
2067     alpha13 = x[16*i+12];
2068     alpha14 = x[16*i+13];
2069     alpha15 = x[16*i+14];
2070     alpha16 = x[16*i+15];
2071     while (n-->0) {
2072       y[16*(*idx)]   += alpha1*(*v);
2073       y[16*(*idx)+1] += alpha2*(*v);
2074       y[16*(*idx)+2] += alpha3*(*v);
2075       y[16*(*idx)+3] += alpha4*(*v);
2076       y[16*(*idx)+4] += alpha5*(*v);
2077       y[16*(*idx)+5] += alpha6*(*v);
2078       y[16*(*idx)+6] += alpha7*(*v);
2079       y[16*(*idx)+7] += alpha8*(*v);
2080       y[16*(*idx)+8]  += alpha9*(*v);
2081       y[16*(*idx)+9]  += alpha10*(*v);
2082       y[16*(*idx)+10] += alpha11*(*v);
2083       y[16*(*idx)+11] += alpha12*(*v);
2084       y[16*(*idx)+12] += alpha13*(*v);
2085       y[16*(*idx)+13] += alpha14*(*v);
2086       y[16*(*idx)+14] += alpha15*(*v);
2087       y[16*(*idx)+15] += alpha16*(*v);
2088       idx++; v++;
2089     }
2090   }
2091   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
2092   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2093   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 /*===================================================================================*/
2098 #undef __FUNCT__
2099 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2100 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2101 {
2102   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2103   PetscErrorCode ierr;
2104 
2105   PetscFunctionBegin;
2106   /* start the scatter */
2107   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
2108   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2109   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
2110   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2111   PetscFunctionReturn(0);
2112 }
2113 
2114 #undef __FUNCT__
2115 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2116 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2117 {
2118   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2119   PetscErrorCode ierr;
2120 
2121   PetscFunctionBegin;
2122   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2123   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2124   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
2125   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 #undef __FUNCT__
2130 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2131 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2132 {
2133   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2134   PetscErrorCode ierr;
2135 
2136   PetscFunctionBegin;
2137   /* start the scatter */
2138   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
2139   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2140   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
2141   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 #undef __FUNCT__
2146 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2147 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2148 {
2149   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2150   PetscErrorCode ierr;
2151 
2152   PetscFunctionBegin;
2153   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2154   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
2155   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2156   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 #undef __FUNCT__
2161 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
2162 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2163 {
2164   /* This routine requires testing -- but it's getting better. */
2165   PetscErrorCode     ierr;
2166   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2167   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2168   Mat                P=pp->AIJ;
2169   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2170   PetscInt           *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2171   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2172   PetscInt           an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof,cn;
2173   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2174   MatScalar          *ca;
2175 
2176   PetscFunctionBegin;
2177   /* Start timer */
2178   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
2179 
2180   /* Get ij structure of P^T */
2181   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2182 
2183   cn = pn*ppdof;
2184   /* Allocate ci array, arrays for fill computation and */
2185   /* free space for accumulating nonzero column info */
2186   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
2187   ci[0] = 0;
2188 
2189   /* Work arrays for rows of P^T*A */
2190   ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
2191   ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
2192   ptasparserow = ptadenserow  + an;
2193   denserow     = ptasparserow + an;
2194   sparserow    = denserow     + cn;
2195 
2196   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2197   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2198   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2199   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2200   current_space = free_space;
2201 
2202   /* Determine symbolic info for each row of C: */
2203   for (i=0;i<pn;i++) {
2204     ptnzi  = pti[i+1] - pti[i];
2205     ptJ    = ptj + pti[i];
2206     for (dof=0;dof<ppdof;dof++) {
2207       ptanzi = 0;
2208       /* Determine symbolic row of PtA: */
2209       for (j=0;j<ptnzi;j++) {
2210         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2211         arow = ptJ[j]*ppdof + dof;
2212         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2213         anzj = ai[arow+1] - ai[arow];
2214         ajj  = aj + ai[arow];
2215         for (k=0;k<anzj;k++) {
2216           if (!ptadenserow[ajj[k]]) {
2217             ptadenserow[ajj[k]]    = -1;
2218             ptasparserow[ptanzi++] = ajj[k];
2219           }
2220         }
2221       }
2222       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2223       ptaj = ptasparserow;
2224       cnzi   = 0;
2225       for (j=0;j<ptanzi;j++) {
2226         /* Get offset within block of P */
2227         pshift = *ptaj%ppdof;
2228         /* Get block row of P */
2229         prow = (*ptaj++)/ppdof; /* integer division */
2230         /* P has same number of nonzeros per row as the compressed form */
2231         pnzj = pi[prow+1] - pi[prow];
2232         pjj  = pj + pi[prow];
2233         for (k=0;k<pnzj;k++) {
2234           /* Locations in C are shifted by the offset within the block */
2235           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2236           if (!denserow[pjj[k]*ppdof+pshift]) {
2237             denserow[pjj[k]*ppdof+pshift] = -1;
2238             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2239           }
2240         }
2241       }
2242 
2243       /* sort sparserow */
2244       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
2245 
2246       /* If free space is not available, make more free space */
2247       /* Double the amount of total space in the list */
2248       if (current_space->local_remaining<cnzi) {
2249         ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
2250       }
2251 
2252       /* Copy data into free space, and zero out denserows */
2253       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
2254       current_space->array           += cnzi;
2255       current_space->local_used      += cnzi;
2256       current_space->local_remaining -= cnzi;
2257 
2258       for (j=0;j<ptanzi;j++) {
2259         ptadenserow[ptasparserow[j]] = 0;
2260       }
2261       for (j=0;j<cnzi;j++) {
2262         denserow[sparserow[j]] = 0;
2263       }
2264       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2265       /*        For now, we will recompute what is needed. */
2266       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2267     }
2268   }
2269   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2270   /* Allocate space for cj, initialize cj, and */
2271   /* destroy list of free space and other temporary array(s) */
2272   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
2273   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
2274   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
2275 
2276   /* Allocate space for ca */
2277   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
2278   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
2279 
2280   /* put together the new matrix */
2281   ierr = MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
2282 
2283   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2284   /* Since these are PETSc arrays, change flags to free them as necessary. */
2285   c = (Mat_SeqAIJ *)((*C)->data);
2286   c->freedata = PETSC_TRUE;
2287   c->nonew    = 0;
2288 
2289   /* Clean up. */
2290   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2291 
2292   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
2293   PetscFunctionReturn(0);
2294 }
2295 
2296 #undef __FUNCT__
2297 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
2298 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2299 {
2300   /* This routine requires testing -- first draft only */
2301   PetscErrorCode ierr;
2302   PetscInt       flops=0;
2303   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
2304   Mat            P=pp->AIJ;
2305   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
2306   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
2307   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
2308   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2309   PetscInt       *ci=c->i,*cj=c->j,*cjj;
2310   PetscInt       am=A->M,cn=C->N,cm=C->M,ppdof=pp->dof;
2311   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2312   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
2313 
2314   PetscFunctionBegin;
2315   /* Allocate temporary array for storage of one row of A*P */
2316   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
2317   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
2318 
2319   apj      = (PetscInt *)(apa + cn);
2320   apjdense = apj + cn;
2321 
2322   /* Clear old values in C */
2323   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
2324 
2325   for (i=0;i<am;i++) {
2326     /* Form sparse row of A*P */
2327     anzi  = ai[i+1] - ai[i];
2328     apnzj = 0;
2329     for (j=0;j<anzi;j++) {
2330       /* Get offset within block of P */
2331       pshift = *aj%ppdof;
2332       /* Get block row of P */
2333       prow   = *aj++/ppdof; /* integer division */
2334       pnzj = pi[prow+1] - pi[prow];
2335       pjj  = pj + pi[prow];
2336       paj  = pa + pi[prow];
2337       for (k=0;k<pnzj;k++) {
2338         poffset = pjj[k]*ppdof+pshift;
2339         if (!apjdense[poffset]) {
2340           apjdense[poffset] = -1;
2341           apj[apnzj++]      = poffset;
2342         }
2343         apa[poffset] += (*aa)*paj[k];
2344       }
2345       flops += 2*pnzj;
2346       aa++;
2347     }
2348 
2349     /* Sort the j index array for quick sparse axpy. */
2350     /* Note: a array does not need sorting as it is in dense storage locations. */
2351     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
2352 
2353     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2354     prow    = i/ppdof; /* integer division */
2355     pshift  = i%ppdof;
2356     poffset = pi[prow];
2357     pnzi = pi[prow+1] - poffset;
2358     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2359     pJ   = pj+poffset;
2360     pA   = pa+poffset;
2361     for (j=0;j<pnzi;j++) {
2362       crow   = (*pJ)*ppdof+pshift;
2363       cjj    = cj + ci[crow];
2364       caj    = ca + ci[crow];
2365       pJ++;
2366       /* Perform sparse axpy operation.  Note cjj includes apj. */
2367       for (k=0,nextap=0;nextap<apnzj;k++) {
2368         if (cjj[k]==apj[nextap]) {
2369           caj[k] += (*pA)*apa[apj[nextap++]];
2370         }
2371       }
2372       flops += 2*apnzj;
2373       pA++;
2374     }
2375 
2376     /* Zero the current row info for A*P */
2377     for (j=0;j<apnzj;j++) {
2378       apa[apj[j]]      = 0.;
2379       apjdense[apj[j]] = 0;
2380     }
2381   }
2382 
2383   /* Assemble the final matrix and clean up */
2384   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2385   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2386   ierr = PetscFree(apa);CHKERRQ(ierr);
2387   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
2388 
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 EXTERN_C_BEGIN
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
2395 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2396 {
2397   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2398   Mat               a = b->AIJ,B;
2399   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
2400   PetscErrorCode    ierr;
2401   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2402   PetscInt          *cols;
2403   PetscScalar       *vals;
2404 
2405   PetscFunctionBegin;
2406   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
2407   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
2408   for (i=0; i<m; i++) {
2409     nmax = PetscMax(nmax,aij->ilen[i]);
2410     for (j=0; j<dof; j++) {
2411       ilen[dof*i+j] = aij->ilen[i];
2412     }
2413   }
2414   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
2415   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2416   ierr = PetscFree(ilen);CHKERRQ(ierr);
2417   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
2418   ii   = 0;
2419   for (i=0; i<m; i++) {
2420     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2421     for (j=0; j<dof; j++) {
2422       for (k=0; k<ncols; k++) {
2423         icols[k] = dof*cols[k]+j;
2424       }
2425       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2426       ii++;
2427     }
2428     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2429   }
2430   ierr = PetscFree(icols);CHKERRQ(ierr);
2431   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2432   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2433 
2434   if (reuse == MAT_REUSE_MATRIX) {
2435     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2436   } else {
2437     *newmat = B;
2438   }
2439   PetscFunctionReturn(0);
2440 }
2441 EXTERN_C_END
2442 
2443 #include "src/mat/impls/aij/mpi/mpiaij.h"
2444 
2445 EXTERN_C_BEGIN
2446 #undef __FUNCT__
2447 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
2448 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2449 {
2450   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
2451   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2452   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2453   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2454   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2455   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2456   PetscInt          dof = maij->dof,i,j,*dnz,*onz,nmax = 0,onmax = 0,*oicols,*icols,ncols,*cols,oncols,*ocols;
2457   PetscInt          rstart,cstart,*garray,ii,k;
2458   PetscErrorCode    ierr;
2459   PetscScalar       *vals,*ovals;
2460 
2461   PetscFunctionBegin;
2462   ierr = PetscMalloc2(A->m,PetscInt,&dnz,A->m,PetscInt,&onz);CHKERRQ(ierr);
2463   for (i=0; i<A->m/dof; i++) {
2464     nmax  = PetscMax(nmax,AIJ->ilen[i]);
2465     onmax = PetscMax(onmax,OAIJ->ilen[i]);
2466     for (j=0; j<dof; j++) {
2467       dnz[dof*i+j] = AIJ->ilen[i];
2468       onz[dof*i+j] = OAIJ->ilen[i];
2469     }
2470   }
2471   ierr = MatCreateMPIAIJ(A->comm,A->m,A->n,A->M,A->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
2472   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2473   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
2474 
2475   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
2476   rstart = dof*mpiaij->rstart;
2477   cstart = dof*mpiaij->cstart;
2478   garray = mpiaij->garray;
2479 
2480   ii = rstart;
2481   for (i=0; i<A->m/dof; i++) {
2482     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2483     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
2484     for (j=0; j<dof; j++) {
2485       for (k=0; k<ncols; k++) {
2486         icols[k] = cstart + dof*cols[k]+j;
2487       }
2488       for (k=0; k<oncols; k++) {
2489         oicols[k] = dof*garray[ocols[k]]+j;
2490       }
2491       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2492       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
2493       ii++;
2494     }
2495     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2496     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
2497   }
2498   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
2499 
2500   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2501   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2502 
2503   if (reuse == MAT_REUSE_MATRIX) {
2504     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2505   } else {
2506     *newmat = B;
2507   }
2508   PetscFunctionReturn(0);
2509 }
2510 EXTERN_C_END
2511 
2512 
2513 /* ---------------------------------------------------------------------------------- */
2514 /*MC
2515   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
2516   operations for multicomponent problems.  It interpolates each component the same
2517   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
2518   and MATMPIAIJ for distributed matrices.
2519 
2520   Operations provided:
2521 + MatMult
2522 . MatMultTranspose
2523 . MatMultAdd
2524 . MatMultTransposeAdd
2525 - MatView
2526 
2527   Level: advanced
2528 
2529 M*/
2530 #undef __FUNCT__
2531 #define __FUNCT__ "MatCreateMAIJ"
2532 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2533 {
2534   PetscErrorCode ierr;
2535   PetscMPIInt    size;
2536   PetscInt       n;
2537   Mat_MPIMAIJ    *b;
2538   Mat            B;
2539 
2540   PetscFunctionBegin;
2541   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
2542 
2543   if (dof == 1) {
2544     *maij = A;
2545   } else {
2546     ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
2547     ierr = MatSetSizes(B,dof*A->m,dof*A->n,dof*A->M,dof*A->N);CHKERRQ(ierr);
2548     B->assembled    = PETSC_TRUE;
2549 
2550     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2551     if (size == 1) {
2552       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
2553       B->ops->destroy = MatDestroy_SeqMAIJ;
2554       B->ops->view    = MatView_SeqMAIJ;
2555       b      = (Mat_MPIMAIJ*)B->data;
2556       b->dof = dof;
2557       b->AIJ = A;
2558       if (dof == 2) {
2559         B->ops->mult             = MatMult_SeqMAIJ_2;
2560         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
2561         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
2562         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2563       } else if (dof == 3) {
2564         B->ops->mult             = MatMult_SeqMAIJ_3;
2565         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
2566         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
2567         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2568       } else if (dof == 4) {
2569         B->ops->mult             = MatMult_SeqMAIJ_4;
2570         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
2571         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
2572         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2573       } else if (dof == 5) {
2574         B->ops->mult             = MatMult_SeqMAIJ_5;
2575         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
2576         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
2577         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2578       } else if (dof == 6) {
2579         B->ops->mult             = MatMult_SeqMAIJ_6;
2580         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
2581         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
2582         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2583       } else if (dof == 7) {
2584         B->ops->mult             = MatMult_SeqMAIJ_7;
2585         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
2586         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
2587         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2588       } else if (dof == 8) {
2589         B->ops->mult             = MatMult_SeqMAIJ_8;
2590         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
2591         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
2592         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2593       } else if (dof == 9) {
2594         B->ops->mult             = MatMult_SeqMAIJ_9;
2595         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
2596         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
2597         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2598       } else if (dof == 10) {
2599         B->ops->mult             = MatMult_SeqMAIJ_10;
2600         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
2601         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
2602         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
2603       } else if (dof == 16) {
2604         B->ops->mult             = MatMult_SeqMAIJ_16;
2605         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
2606         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
2607         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2608       } else {
2609         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2610       }
2611       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2612       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2613       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
2614     } else {
2615       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2616       IS         from,to;
2617       Vec        gvec;
2618       PetscInt   *garray,i;
2619 
2620       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
2621       B->ops->destroy = MatDestroy_MPIMAIJ;
2622       B->ops->view    = MatView_MPIMAIJ;
2623       b      = (Mat_MPIMAIJ*)B->data;
2624       b->dof = dof;
2625       b->A   = A;
2626       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
2627       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
2628 
2629       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
2630       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
2631       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
2632 
2633       /* create two temporary Index sets for build scatter gather */
2634       ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2635       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2636       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
2637       ierr = PetscFree(garray);CHKERRQ(ierr);
2638       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
2639 
2640       /* create temporary global vector to generate scatter context */
2641       ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr);
2642       ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr);
2643 
2644       /* generate the scatter context */
2645       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
2646 
2647       ierr = ISDestroy(from);CHKERRQ(ierr);
2648       ierr = ISDestroy(to);CHKERRQ(ierr);
2649       ierr = VecDestroy(gvec);CHKERRQ(ierr);
2650 
2651       B->ops->mult             = MatMult_MPIMAIJ_dof;
2652       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
2653       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
2654       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
2655       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
2656     }
2657     *maij = B;
2658     ierr = MatView_Private(B);CHKERRQ(ierr);
2659   }
2660   PetscFunctionReturn(0);
2661 }
2662 
2663 
2664 
2665 
2666 
2667 
2668 
2669 
2670 
2671 
2672 
2673 
2674