xref: /petsc/src/mat/impls/maij/maij.c (revision ce0a2cd1da0658c2b28aad1be2e2c8e41567bece)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the MAIJ  matrix storage format.
5   This format is used for restriction and interpolation operations for
6   multicomponent problems. It interpolates each component the same way
7   independently.
8 
9      We provide:
10          MatMult()
11          MatMultTranspose()
12          MatMultTransposeAdd()
13          MatMultAdd()
14           and
15          MatCreateMAIJ(Mat,dof,Mat*)
16 
17      This single directory handles both the sequential and parallel codes
18 */
19 
20 #include "src/mat/impls/maij/maij.h"
21 #include "src/mat/utils/freespace.h"
22 #include "private/vecimpl.h"
23 
24 #undef __FUNCT__
25 #define __FUNCT__ "MatMAIJGetAIJ"
26 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B)
27 {
28   PetscErrorCode ierr;
29   PetscTruth     ismpimaij,isseqmaij;
30 
31   PetscFunctionBegin;
32   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
33   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
34   if (ismpimaij) {
35     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
36 
37     *B = b->A;
38   } else if (isseqmaij) {
39     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
40 
41     *B = b->AIJ;
42   } else {
43     *B = A;
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMAIJRedimension"
50 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
51 {
52   PetscErrorCode ierr;
53   Mat            Aij;
54 
55   PetscFunctionBegin;
56   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
57   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatDestroy_SeqMAIJ"
63 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
64 {
65   PetscErrorCode ierr;
66   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
67 
68   PetscFunctionBegin;
69   if (b->AIJ) {
70     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
71   }
72   ierr = PetscFree(b);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatView_SeqMAIJ"
78 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
79 {
80   PetscErrorCode ierr;
81   Mat            B;
82 
83   PetscFunctionBegin;
84   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
85   ierr = MatView(B,viewer);CHKERRQ(ierr);
86   ierr = MatDestroy(B);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatView_MPIMAIJ"
92 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
93 {
94   PetscErrorCode ierr;
95   Mat            B;
96 
97   PetscFunctionBegin;
98   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
99   ierr = MatView(B,viewer);CHKERRQ(ierr);
100   ierr = MatDestroy(B);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatDestroy_MPIMAIJ"
106 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107 {
108   PetscErrorCode ierr;
109   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
110 
111   PetscFunctionBegin;
112   if (b->AIJ) {
113     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
114   }
115   if (b->OAIJ) {
116     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
117   }
118   if (b->A) {
119     ierr = MatDestroy(b->A);CHKERRQ(ierr);
120   }
121   if (b->ctx) {
122     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
123   }
124   if (b->w) {
125     ierr = VecDestroy(b->w);CHKERRQ(ierr);
126   }
127   ierr = PetscFree(b);CHKERRQ(ierr);
128   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 /*MC
133   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
134   multicomponent problems, interpolating or restricting each component the same way independently.
135   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
136 
137   Operations provided:
138 . MatMult
139 . MatMultTranspose
140 . MatMultAdd
141 . MatMultTransposeAdd
142 
143   Level: advanced
144 
145 .seealso: MatCreateSeqDense
146 M*/
147 
148 EXTERN_C_BEGIN
149 #undef __FUNCT__
150 #define __FUNCT__ "MatCreate_MAIJ"
151 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A)
152 {
153   PetscErrorCode ierr;
154   Mat_MPIMAIJ    *b;
155   PetscMPIInt    size;
156 
157   PetscFunctionBegin;
158   ierr     = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr);
159   A->data  = (void*)b;
160   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
161   A->factor           = 0;
162   A->mapping          = 0;
163 
164   b->AIJ  = 0;
165   b->dof  = 0;
166   b->OAIJ = 0;
167   b->ctx  = 0;
168   b->w    = 0;
169   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
170   if (size == 1){
171     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
172   } else {
173     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
174   }
175   PetscFunctionReturn(0);
176 }
177 EXTERN_C_END
178 
179 /* --------------------------------------------------------------------------------------*/
180 #undef __FUNCT__
181 #define __FUNCT__ "MatMult_SeqMAIJ_2"
182 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
183 {
184   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
185   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
186   PetscScalar    *x,*y,*v,sum1, sum2;
187   PetscErrorCode ierr;
188   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
189   PetscInt       n,i,jrow,j;
190 
191   PetscFunctionBegin;
192   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
193   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
194   idx  = a->j;
195   v    = a->a;
196   ii   = a->i;
197 
198   for (i=0; i<m; i++) {
199     jrow = ii[i];
200     n    = ii[i+1] - jrow;
201     sum1  = 0.0;
202     sum2  = 0.0;
203     nonzerorow += (n>0);
204     for (j=0; j<n; j++) {
205       sum1 += v[jrow]*x[2*idx[jrow]];
206       sum2 += v[jrow]*x[2*idx[jrow]+1];
207       jrow++;
208      }
209     y[2*i]   = sum1;
210     y[2*i+1] = sum2;
211   }
212 
213   ierr = PetscLogFlops(4*a->nz - 2*nonzerorow);CHKERRQ(ierr);
214   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
215   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
216   PetscFunctionReturn(0);
217 }
218 
219 #undef __FUNCT__
220 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
221 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
222 {
223   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
224   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
225   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
226   PetscErrorCode ierr;
227   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
228 
229   PetscFunctionBegin;
230   ierr = VecSet(yy,zero);CHKERRQ(ierr);
231   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
232   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
233 
234   for (i=0; i<m; i++) {
235     idx    = a->j + a->i[i] ;
236     v      = a->a + a->i[i] ;
237     n      = a->i[i+1] - a->i[i];
238     alpha1 = x[2*i];
239     alpha2 = x[2*i+1];
240     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
241   }
242   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
243   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
244   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
245   PetscFunctionReturn(0);
246 }
247 
248 #undef __FUNCT__
249 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
250 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
251 {
252   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
253   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
254   PetscScalar    *x,*y,*v,sum1, sum2;
255   PetscErrorCode ierr;
256   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
257   PetscInt       n,i,jrow,j;
258 
259   PetscFunctionBegin;
260   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
261   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
262   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
263   idx  = a->j;
264   v    = a->a;
265   ii   = a->i;
266 
267   for (i=0; i<m; i++) {
268     jrow = ii[i];
269     n    = ii[i+1] - jrow;
270     sum1  = 0.0;
271     sum2  = 0.0;
272     for (j=0; j<n; j++) {
273       sum1 += v[jrow]*x[2*idx[jrow]];
274       sum2 += v[jrow]*x[2*idx[jrow]+1];
275       jrow++;
276      }
277     y[2*i]   += sum1;
278     y[2*i+1] += sum2;
279   }
280 
281   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
282   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
283   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
284   PetscFunctionReturn(0);
285 }
286 #undef __FUNCT__
287 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
288 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
289 {
290   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
291   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
292   PetscScalar    *x,*y,*v,alpha1,alpha2;
293   PetscErrorCode ierr;
294   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
295 
296   PetscFunctionBegin;
297   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
298   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
299   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
300 
301   for (i=0; i<m; i++) {
302     idx   = a->j + a->i[i] ;
303     v     = a->a + a->i[i] ;
304     n     = a->i[i+1] - a->i[i];
305     alpha1 = x[2*i];
306     alpha2 = x[2*i+1];
307     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
308   }
309   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
310   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
311   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
312   PetscFunctionReturn(0);
313 }
314 /* --------------------------------------------------------------------------------------*/
315 #undef __FUNCT__
316 #define __FUNCT__ "MatMult_SeqMAIJ_3"
317 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
318 {
319   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
320   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
321   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
322   PetscErrorCode ierr;
323   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
324   PetscInt       n,i,jrow,j;
325 
326   PetscFunctionBegin;
327   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
328   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
329   idx  = a->j;
330   v    = a->a;
331   ii   = a->i;
332 
333   for (i=0; i<m; i++) {
334     jrow = ii[i];
335     n    = ii[i+1] - jrow;
336     sum1  = 0.0;
337     sum2  = 0.0;
338     sum3  = 0.0;
339     nonzerorow += (n>0);
340     for (j=0; j<n; j++) {
341       sum1 += v[jrow]*x[3*idx[jrow]];
342       sum2 += v[jrow]*x[3*idx[jrow]+1];
343       sum3 += v[jrow]*x[3*idx[jrow]+2];
344       jrow++;
345      }
346     y[3*i]   = sum1;
347     y[3*i+1] = sum2;
348     y[3*i+2] = sum3;
349   }
350 
351   ierr = PetscLogFlops(6*a->nz - 3*nonzerorow);CHKERRQ(ierr);
352   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
353   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
359 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
360 {
361   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
362   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
363   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
364   PetscErrorCode ierr;
365   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
366 
367   PetscFunctionBegin;
368   ierr = VecSet(yy,zero);CHKERRQ(ierr);
369   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
370   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
371 
372   for (i=0; i<m; i++) {
373     idx    = a->j + a->i[i];
374     v      = a->a + a->i[i];
375     n      = a->i[i+1] - a->i[i];
376     alpha1 = x[3*i];
377     alpha2 = x[3*i+1];
378     alpha3 = x[3*i+2];
379     while (n-->0) {
380       y[3*(*idx)]   += alpha1*(*v);
381       y[3*(*idx)+1] += alpha2*(*v);
382       y[3*(*idx)+2] += alpha3*(*v);
383       idx++; v++;
384     }
385   }
386   ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr);
387   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
388   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
389   PetscFunctionReturn(0);
390 }
391 
392 #undef __FUNCT__
393 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
394 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
395 {
396   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
397   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
398   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
399   PetscErrorCode ierr;
400   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
401   PetscInt       n,i,jrow,j;
402 
403   PetscFunctionBegin;
404   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
405   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
406   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
407   idx  = a->j;
408   v    = a->a;
409   ii   = a->i;
410 
411   for (i=0; i<m; i++) {
412     jrow = ii[i];
413     n    = ii[i+1] - jrow;
414     sum1  = 0.0;
415     sum2  = 0.0;
416     sum3  = 0.0;
417     for (j=0; j<n; j++) {
418       sum1 += v[jrow]*x[3*idx[jrow]];
419       sum2 += v[jrow]*x[3*idx[jrow]+1];
420       sum3 += v[jrow]*x[3*idx[jrow]+2];
421       jrow++;
422      }
423     y[3*i]   += sum1;
424     y[3*i+1] += sum2;
425     y[3*i+2] += sum3;
426   }
427 
428   ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr);
429   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
430   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 #undef __FUNCT__
434 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
435 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
436 {
437   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
438   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
439   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
440   PetscErrorCode ierr;
441   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
442 
443   PetscFunctionBegin;
444   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
445   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
446   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
447   for (i=0; i<m; i++) {
448     idx    = a->j + a->i[i] ;
449     v      = a->a + a->i[i] ;
450     n      = a->i[i+1] - a->i[i];
451     alpha1 = x[3*i];
452     alpha2 = x[3*i+1];
453     alpha3 = x[3*i+2];
454     while (n-->0) {
455       y[3*(*idx)]   += alpha1*(*v);
456       y[3*(*idx)+1] += alpha2*(*v);
457       y[3*(*idx)+2] += alpha3*(*v);
458       idx++; v++;
459     }
460   }
461   ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr);
462   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
463   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
464   PetscFunctionReturn(0);
465 }
466 
467 /* ------------------------------------------------------------------------------*/
468 #undef __FUNCT__
469 #define __FUNCT__ "MatMult_SeqMAIJ_4"
470 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
471 {
472   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
473   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
474   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
475   PetscErrorCode ierr;
476   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
477   PetscInt       n,i,jrow,j;
478 
479   PetscFunctionBegin;
480   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
481   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
482   idx  = a->j;
483   v    = a->a;
484   ii   = a->i;
485 
486   for (i=0; i<m; i++) {
487     jrow = ii[i];
488     n    = ii[i+1] - jrow;
489     sum1  = 0.0;
490     sum2  = 0.0;
491     sum3  = 0.0;
492     sum4  = 0.0;
493     nonzerorow += (n>0);
494     for (j=0; j<n; j++) {
495       sum1 += v[jrow]*x[4*idx[jrow]];
496       sum2 += v[jrow]*x[4*idx[jrow]+1];
497       sum3 += v[jrow]*x[4*idx[jrow]+2];
498       sum4 += v[jrow]*x[4*idx[jrow]+3];
499       jrow++;
500      }
501     y[4*i]   = sum1;
502     y[4*i+1] = sum2;
503     y[4*i+2] = sum3;
504     y[4*i+3] = sum4;
505   }
506 
507   ierr = PetscLogFlops(8*a->nz - 4*nonzerorow);CHKERRQ(ierr);
508   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
509   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
510   PetscFunctionReturn(0);
511 }
512 
513 #undef __FUNCT__
514 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
515 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
516 {
517   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
518   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
519   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
520   PetscErrorCode ierr;
521   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
522 
523   PetscFunctionBegin;
524   ierr = VecSet(yy,zero);CHKERRQ(ierr);
525   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
526   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
527   for (i=0; i<m; i++) {
528     idx    = a->j + a->i[i] ;
529     v      = a->a + a->i[i] ;
530     n      = a->i[i+1] - a->i[i];
531     alpha1 = x[4*i];
532     alpha2 = x[4*i+1];
533     alpha3 = x[4*i+2];
534     alpha4 = x[4*i+3];
535     while (n-->0) {
536       y[4*(*idx)]   += alpha1*(*v);
537       y[4*(*idx)+1] += alpha2*(*v);
538       y[4*(*idx)+2] += alpha3*(*v);
539       y[4*(*idx)+3] += alpha4*(*v);
540       idx++; v++;
541     }
542   }
543   ierr = PetscLogFlops(8*a->nz);CHKERRQ(ierr);
544   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
545   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
546   PetscFunctionReturn(0);
547 }
548 
549 #undef __FUNCT__
550 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
551 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
552 {
553   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
554   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
555   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
556   PetscErrorCode ierr;
557   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
558   PetscInt       n,i,jrow,j;
559 
560   PetscFunctionBegin;
561   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
562   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
563   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
564   idx  = a->j;
565   v    = a->a;
566   ii   = a->i;
567 
568   for (i=0; i<m; i++) {
569     jrow = ii[i];
570     n    = ii[i+1] - jrow;
571     sum1  = 0.0;
572     sum2  = 0.0;
573     sum3  = 0.0;
574     sum4  = 0.0;
575     for (j=0; j<n; j++) {
576       sum1 += v[jrow]*x[4*idx[jrow]];
577       sum2 += v[jrow]*x[4*idx[jrow]+1];
578       sum3 += v[jrow]*x[4*idx[jrow]+2];
579       sum4 += v[jrow]*x[4*idx[jrow]+3];
580       jrow++;
581      }
582     y[4*i]   += sum1;
583     y[4*i+1] += sum2;
584     y[4*i+2] += sum3;
585     y[4*i+3] += sum4;
586   }
587 
588   ierr = PetscLogFlops(8*a->nz);CHKERRQ(ierr);
589   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
590   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
591   PetscFunctionReturn(0);
592 }
593 #undef __FUNCT__
594 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
595 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
596 {
597   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
598   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
599   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
600   PetscErrorCode ierr;
601   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
602 
603   PetscFunctionBegin;
604   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
605   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
606   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
607 
608   for (i=0; i<m; i++) {
609     idx    = a->j + a->i[i] ;
610     v      = a->a + a->i[i] ;
611     n      = a->i[i+1] - a->i[i];
612     alpha1 = x[4*i];
613     alpha2 = x[4*i+1];
614     alpha3 = x[4*i+2];
615     alpha4 = x[4*i+3];
616     while (n-->0) {
617       y[4*(*idx)]   += alpha1*(*v);
618       y[4*(*idx)+1] += alpha2*(*v);
619       y[4*(*idx)+2] += alpha3*(*v);
620       y[4*(*idx)+3] += alpha4*(*v);
621       idx++; v++;
622     }
623   }
624   ierr = PetscLogFlops(8*a->nz);CHKERRQ(ierr);
625   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
626   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
627   PetscFunctionReturn(0);
628 }
629 /* ------------------------------------------------------------------------------*/
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatMult_SeqMAIJ_5"
633 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
634 {
635   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
636   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
637   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
638   PetscErrorCode ierr;
639   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
640   PetscInt       n,i,jrow,j;
641 
642   PetscFunctionBegin;
643   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
644   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
645   idx  = a->j;
646   v    = a->a;
647   ii   = a->i;
648 
649   for (i=0; i<m; i++) {
650     jrow = ii[i];
651     n    = ii[i+1] - jrow;
652     sum1  = 0.0;
653     sum2  = 0.0;
654     sum3  = 0.0;
655     sum4  = 0.0;
656     sum5  = 0.0;
657     nonzerorow += (n>0);
658     for (j=0; j<n; j++) {
659       sum1 += v[jrow]*x[5*idx[jrow]];
660       sum2 += v[jrow]*x[5*idx[jrow]+1];
661       sum3 += v[jrow]*x[5*idx[jrow]+2];
662       sum4 += v[jrow]*x[5*idx[jrow]+3];
663       sum5 += v[jrow]*x[5*idx[jrow]+4];
664       jrow++;
665      }
666     y[5*i]   = sum1;
667     y[5*i+1] = sum2;
668     y[5*i+2] = sum3;
669     y[5*i+3] = sum4;
670     y[5*i+4] = sum5;
671   }
672 
673   ierr = PetscLogFlops(10*a->nz - 5*nonzerorow);CHKERRQ(ierr);
674   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
675   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
676   PetscFunctionReturn(0);
677 }
678 
679 #undef __FUNCT__
680 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
681 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
682 {
683   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
684   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
685   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
686   PetscErrorCode ierr;
687   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
688 
689   PetscFunctionBegin;
690   ierr = VecSet(yy,zero);CHKERRQ(ierr);
691   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
692   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
693 
694   for (i=0; i<m; i++) {
695     idx    = a->j + a->i[i] ;
696     v      = a->a + a->i[i] ;
697     n      = a->i[i+1] - a->i[i];
698     alpha1 = x[5*i];
699     alpha2 = x[5*i+1];
700     alpha3 = x[5*i+2];
701     alpha4 = x[5*i+3];
702     alpha5 = x[5*i+4];
703     while (n-->0) {
704       y[5*(*idx)]   += alpha1*(*v);
705       y[5*(*idx)+1] += alpha2*(*v);
706       y[5*(*idx)+2] += alpha3*(*v);
707       y[5*(*idx)+3] += alpha4*(*v);
708       y[5*(*idx)+4] += alpha5*(*v);
709       idx++; v++;
710     }
711   }
712   ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr);
713   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
714   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
715   PetscFunctionReturn(0);
716 }
717 
718 #undef __FUNCT__
719 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
720 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
721 {
722   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
723   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
724   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
725   PetscErrorCode ierr;
726   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
727   PetscInt       n,i,jrow,j;
728 
729   PetscFunctionBegin;
730   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
731   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
732   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
733   idx  = a->j;
734   v    = a->a;
735   ii   = a->i;
736 
737   for (i=0; i<m; i++) {
738     jrow = ii[i];
739     n    = ii[i+1] - jrow;
740     sum1  = 0.0;
741     sum2  = 0.0;
742     sum3  = 0.0;
743     sum4  = 0.0;
744     sum5  = 0.0;
745     for (j=0; j<n; j++) {
746       sum1 += v[jrow]*x[5*idx[jrow]];
747       sum2 += v[jrow]*x[5*idx[jrow]+1];
748       sum3 += v[jrow]*x[5*idx[jrow]+2];
749       sum4 += v[jrow]*x[5*idx[jrow]+3];
750       sum5 += v[jrow]*x[5*idx[jrow]+4];
751       jrow++;
752      }
753     y[5*i]   += sum1;
754     y[5*i+1] += sum2;
755     y[5*i+2] += sum3;
756     y[5*i+3] += sum4;
757     y[5*i+4] += sum5;
758   }
759 
760   ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr);
761   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
762   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNCT__
767 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
768 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
769 {
770   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
771   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
772   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
773   PetscErrorCode ierr;
774   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
775 
776   PetscFunctionBegin;
777   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
778   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
779   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
780 
781   for (i=0; i<m; i++) {
782     idx    = a->j + a->i[i] ;
783     v      = a->a + a->i[i] ;
784     n      = a->i[i+1] - a->i[i];
785     alpha1 = x[5*i];
786     alpha2 = x[5*i+1];
787     alpha3 = x[5*i+2];
788     alpha4 = x[5*i+3];
789     alpha5 = x[5*i+4];
790     while (n-->0) {
791       y[5*(*idx)]   += alpha1*(*v);
792       y[5*(*idx)+1] += alpha2*(*v);
793       y[5*(*idx)+2] += alpha3*(*v);
794       y[5*(*idx)+3] += alpha4*(*v);
795       y[5*(*idx)+4] += alpha5*(*v);
796       idx++; v++;
797     }
798   }
799   ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr);
800   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
801   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
802   PetscFunctionReturn(0);
803 }
804 
805 /* ------------------------------------------------------------------------------*/
806 #undef __FUNCT__
807 #define __FUNCT__ "MatMult_SeqMAIJ_6"
808 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
809 {
810   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
811   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
812   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
813   PetscErrorCode ierr;
814   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
815   PetscInt       n,i,jrow,j;
816 
817   PetscFunctionBegin;
818   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
819   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
820   idx  = a->j;
821   v    = a->a;
822   ii   = a->i;
823 
824   for (i=0; i<m; i++) {
825     jrow = ii[i];
826     n    = ii[i+1] - jrow;
827     sum1  = 0.0;
828     sum2  = 0.0;
829     sum3  = 0.0;
830     sum4  = 0.0;
831     sum5  = 0.0;
832     sum6  = 0.0;
833     nonzerorow += (n>0);
834     for (j=0; j<n; j++) {
835       sum1 += v[jrow]*x[6*idx[jrow]];
836       sum2 += v[jrow]*x[6*idx[jrow]+1];
837       sum3 += v[jrow]*x[6*idx[jrow]+2];
838       sum4 += v[jrow]*x[6*idx[jrow]+3];
839       sum5 += v[jrow]*x[6*idx[jrow]+4];
840       sum6 += v[jrow]*x[6*idx[jrow]+5];
841       jrow++;
842      }
843     y[6*i]   = sum1;
844     y[6*i+1] = sum2;
845     y[6*i+2] = sum3;
846     y[6*i+3] = sum4;
847     y[6*i+4] = sum5;
848     y[6*i+5] = sum6;
849   }
850 
851   ierr = PetscLogFlops(12*a->nz - 6*nonzerorow);CHKERRQ(ierr);
852   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
853   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
854   PetscFunctionReturn(0);
855 }
856 
857 #undef __FUNCT__
858 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
859 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
860 {
861   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
862   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
863   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
864   PetscErrorCode ierr;
865   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
866 
867   PetscFunctionBegin;
868   ierr = VecSet(yy,zero);CHKERRQ(ierr);
869   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
870   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
871 
872   for (i=0; i<m; i++) {
873     idx    = a->j + a->i[i] ;
874     v      = a->a + a->i[i] ;
875     n      = a->i[i+1] - a->i[i];
876     alpha1 = x[6*i];
877     alpha2 = x[6*i+1];
878     alpha3 = x[6*i+2];
879     alpha4 = x[6*i+3];
880     alpha5 = x[6*i+4];
881     alpha6 = x[6*i+5];
882     while (n-->0) {
883       y[6*(*idx)]   += alpha1*(*v);
884       y[6*(*idx)+1] += alpha2*(*v);
885       y[6*(*idx)+2] += alpha3*(*v);
886       y[6*(*idx)+3] += alpha4*(*v);
887       y[6*(*idx)+4] += alpha5*(*v);
888       y[6*(*idx)+5] += alpha6*(*v);
889       idx++; v++;
890     }
891   }
892   ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr);
893   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
894   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
895   PetscFunctionReturn(0);
896 }
897 
898 #undef __FUNCT__
899 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
900 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
901 {
902   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
903   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
904   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
905   PetscErrorCode ierr;
906   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
907   PetscInt       n,i,jrow,j;
908 
909   PetscFunctionBegin;
910   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
911   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
912   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
913   idx  = a->j;
914   v    = a->a;
915   ii   = a->i;
916 
917   for (i=0; i<m; i++) {
918     jrow = ii[i];
919     n    = ii[i+1] - jrow;
920     sum1  = 0.0;
921     sum2  = 0.0;
922     sum3  = 0.0;
923     sum4  = 0.0;
924     sum5  = 0.0;
925     sum6  = 0.0;
926     for (j=0; j<n; j++) {
927       sum1 += v[jrow]*x[6*idx[jrow]];
928       sum2 += v[jrow]*x[6*idx[jrow]+1];
929       sum3 += v[jrow]*x[6*idx[jrow]+2];
930       sum4 += v[jrow]*x[6*idx[jrow]+3];
931       sum5 += v[jrow]*x[6*idx[jrow]+4];
932       sum6 += v[jrow]*x[6*idx[jrow]+5];
933       jrow++;
934      }
935     y[6*i]   += sum1;
936     y[6*i+1] += sum2;
937     y[6*i+2] += sum3;
938     y[6*i+3] += sum4;
939     y[6*i+4] += sum5;
940     y[6*i+5] += sum6;
941   }
942 
943   ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr);
944   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
945   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 #undef __FUNCT__
950 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
951 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
952 {
953   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
954   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
955   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
956   PetscErrorCode ierr;
957   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
958 
959   PetscFunctionBegin;
960   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
961   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
962   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
963 
964   for (i=0; i<m; i++) {
965     idx    = a->j + a->i[i] ;
966     v      = a->a + a->i[i] ;
967     n      = a->i[i+1] - a->i[i];
968     alpha1 = x[6*i];
969     alpha2 = x[6*i+1];
970     alpha3 = x[6*i+2];
971     alpha4 = x[6*i+3];
972     alpha5 = x[6*i+4];
973     alpha6 = x[6*i+5];
974     while (n-->0) {
975       y[6*(*idx)]   += alpha1*(*v);
976       y[6*(*idx)+1] += alpha2*(*v);
977       y[6*(*idx)+2] += alpha3*(*v);
978       y[6*(*idx)+3] += alpha4*(*v);
979       y[6*(*idx)+4] += alpha5*(*v);
980       y[6*(*idx)+5] += alpha6*(*v);
981       idx++; v++;
982     }
983   }
984   ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr);
985   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
986   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
987   PetscFunctionReturn(0);
988 }
989 
990 /* ------------------------------------------------------------------------------*/
991 #undef __FUNCT__
992 #define __FUNCT__ "MatMult_SeqMAIJ_7"
993 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
994 {
995   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
996   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
997   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
998   PetscErrorCode ierr;
999   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1000   PetscInt       n,i,jrow,j;
1001 
1002   PetscFunctionBegin;
1003   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1004   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1005   idx  = a->j;
1006   v    = a->a;
1007   ii   = a->i;
1008 
1009   for (i=0; i<m; i++) {
1010     jrow = ii[i];
1011     n    = ii[i+1] - jrow;
1012     sum1  = 0.0;
1013     sum2  = 0.0;
1014     sum3  = 0.0;
1015     sum4  = 0.0;
1016     sum5  = 0.0;
1017     sum6  = 0.0;
1018     sum7  = 0.0;
1019     nonzerorow += (n>0);
1020     for (j=0; j<n; j++) {
1021       sum1 += v[jrow]*x[7*idx[jrow]];
1022       sum2 += v[jrow]*x[7*idx[jrow]+1];
1023       sum3 += v[jrow]*x[7*idx[jrow]+2];
1024       sum4 += v[jrow]*x[7*idx[jrow]+3];
1025       sum5 += v[jrow]*x[7*idx[jrow]+4];
1026       sum6 += v[jrow]*x[7*idx[jrow]+5];
1027       sum7 += v[jrow]*x[7*idx[jrow]+6];
1028       jrow++;
1029      }
1030     y[7*i]   = sum1;
1031     y[7*i+1] = sum2;
1032     y[7*i+2] = sum3;
1033     y[7*i+3] = sum4;
1034     y[7*i+4] = sum5;
1035     y[7*i+5] = sum6;
1036     y[7*i+6] = sum7;
1037   }
1038 
1039   ierr = PetscLogFlops(14*a->nz - 7*nonzerorow);CHKERRQ(ierr);
1040   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1041   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 #undef __FUNCT__
1046 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1047 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1048 {
1049   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1050   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1051   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1052   PetscErrorCode ierr;
1053   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1054 
1055   PetscFunctionBegin;
1056   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1057   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1058   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1059 
1060   for (i=0; i<m; i++) {
1061     idx    = a->j + a->i[i] ;
1062     v      = a->a + a->i[i] ;
1063     n      = a->i[i+1] - a->i[i];
1064     alpha1 = x[7*i];
1065     alpha2 = x[7*i+1];
1066     alpha3 = x[7*i+2];
1067     alpha4 = x[7*i+3];
1068     alpha5 = x[7*i+4];
1069     alpha6 = x[7*i+5];
1070     alpha7 = x[7*i+6];
1071     while (n-->0) {
1072       y[7*(*idx)]   += alpha1*(*v);
1073       y[7*(*idx)+1] += alpha2*(*v);
1074       y[7*(*idx)+2] += alpha3*(*v);
1075       y[7*(*idx)+3] += alpha4*(*v);
1076       y[7*(*idx)+4] += alpha5*(*v);
1077       y[7*(*idx)+5] += alpha6*(*v);
1078       y[7*(*idx)+6] += alpha7*(*v);
1079       idx++; v++;
1080     }
1081   }
1082   ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr);
1083   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1084   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1090 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1091 {
1092   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1093   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1094   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1095   PetscErrorCode ierr;
1096   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1097   PetscInt       n,i,jrow,j;
1098 
1099   PetscFunctionBegin;
1100   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1101   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1102   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1103   idx  = a->j;
1104   v    = a->a;
1105   ii   = a->i;
1106 
1107   for (i=0; i<m; i++) {
1108     jrow = ii[i];
1109     n    = ii[i+1] - jrow;
1110     sum1  = 0.0;
1111     sum2  = 0.0;
1112     sum3  = 0.0;
1113     sum4  = 0.0;
1114     sum5  = 0.0;
1115     sum6  = 0.0;
1116     sum7  = 0.0;
1117     for (j=0; j<n; j++) {
1118       sum1 += v[jrow]*x[7*idx[jrow]];
1119       sum2 += v[jrow]*x[7*idx[jrow]+1];
1120       sum3 += v[jrow]*x[7*idx[jrow]+2];
1121       sum4 += v[jrow]*x[7*idx[jrow]+3];
1122       sum5 += v[jrow]*x[7*idx[jrow]+4];
1123       sum6 += v[jrow]*x[7*idx[jrow]+5];
1124       sum7 += v[jrow]*x[7*idx[jrow]+6];
1125       jrow++;
1126      }
1127     y[7*i]   += sum1;
1128     y[7*i+1] += sum2;
1129     y[7*i+2] += sum3;
1130     y[7*i+3] += sum4;
1131     y[7*i+4] += sum5;
1132     y[7*i+5] += sum6;
1133     y[7*i+6] += sum7;
1134   }
1135 
1136   ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr);
1137   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1138   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1144 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1145 {
1146   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1147   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1148   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1149   PetscErrorCode ierr;
1150   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1151 
1152   PetscFunctionBegin;
1153   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1154   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1155   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1156   for (i=0; i<m; i++) {
1157     idx    = a->j + a->i[i] ;
1158     v      = a->a + a->i[i] ;
1159     n      = a->i[i+1] - a->i[i];
1160     alpha1 = x[7*i];
1161     alpha2 = x[7*i+1];
1162     alpha3 = x[7*i+2];
1163     alpha4 = x[7*i+3];
1164     alpha5 = x[7*i+4];
1165     alpha6 = x[7*i+5];
1166     alpha7 = x[7*i+6];
1167     while (n-->0) {
1168       y[7*(*idx)]   += alpha1*(*v);
1169       y[7*(*idx)+1] += alpha2*(*v);
1170       y[7*(*idx)+2] += alpha3*(*v);
1171       y[7*(*idx)+3] += alpha4*(*v);
1172       y[7*(*idx)+4] += alpha5*(*v);
1173       y[7*(*idx)+5] += alpha6*(*v);
1174       y[7*(*idx)+6] += alpha7*(*v);
1175       idx++; v++;
1176     }
1177   }
1178   ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr);
1179   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1180   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatMult_SeqMAIJ_8"
1186 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1187 {
1188   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1189   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1190   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1191   PetscErrorCode ierr;
1192   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1193   PetscInt       n,i,jrow,j;
1194 
1195   PetscFunctionBegin;
1196   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1197   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1198   idx  = a->j;
1199   v    = a->a;
1200   ii   = a->i;
1201 
1202   for (i=0; i<m; i++) {
1203     jrow = ii[i];
1204     n    = ii[i+1] - jrow;
1205     sum1  = 0.0;
1206     sum2  = 0.0;
1207     sum3  = 0.0;
1208     sum4  = 0.0;
1209     sum5  = 0.0;
1210     sum6  = 0.0;
1211     sum7  = 0.0;
1212     sum8  = 0.0;
1213     nonzerorow += (n>0);
1214     for (j=0; j<n; j++) {
1215       sum1 += v[jrow]*x[8*idx[jrow]];
1216       sum2 += v[jrow]*x[8*idx[jrow]+1];
1217       sum3 += v[jrow]*x[8*idx[jrow]+2];
1218       sum4 += v[jrow]*x[8*idx[jrow]+3];
1219       sum5 += v[jrow]*x[8*idx[jrow]+4];
1220       sum6 += v[jrow]*x[8*idx[jrow]+5];
1221       sum7 += v[jrow]*x[8*idx[jrow]+6];
1222       sum8 += v[jrow]*x[8*idx[jrow]+7];
1223       jrow++;
1224      }
1225     y[8*i]   = sum1;
1226     y[8*i+1] = sum2;
1227     y[8*i+2] = sum3;
1228     y[8*i+3] = sum4;
1229     y[8*i+4] = sum5;
1230     y[8*i+5] = sum6;
1231     y[8*i+6] = sum7;
1232     y[8*i+7] = sum8;
1233   }
1234 
1235   ierr = PetscLogFlops(16*a->nz - 8*nonzerorow);CHKERRQ(ierr);
1236   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1237   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1243 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1244 {
1245   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1246   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1247   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1248   PetscErrorCode ierr;
1249   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1250 
1251   PetscFunctionBegin;
1252   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1253   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1254   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1255 
1256   for (i=0; i<m; i++) {
1257     idx    = a->j + a->i[i] ;
1258     v      = a->a + a->i[i] ;
1259     n      = a->i[i+1] - a->i[i];
1260     alpha1 = x[8*i];
1261     alpha2 = x[8*i+1];
1262     alpha3 = x[8*i+2];
1263     alpha4 = x[8*i+3];
1264     alpha5 = x[8*i+4];
1265     alpha6 = x[8*i+5];
1266     alpha7 = x[8*i+6];
1267     alpha8 = x[8*i+7];
1268     while (n-->0) {
1269       y[8*(*idx)]   += alpha1*(*v);
1270       y[8*(*idx)+1] += alpha2*(*v);
1271       y[8*(*idx)+2] += alpha3*(*v);
1272       y[8*(*idx)+3] += alpha4*(*v);
1273       y[8*(*idx)+4] += alpha5*(*v);
1274       y[8*(*idx)+5] += alpha6*(*v);
1275       y[8*(*idx)+6] += alpha7*(*v);
1276       y[8*(*idx)+7] += alpha8*(*v);
1277       idx++; v++;
1278     }
1279   }
1280   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
1281   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1282   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1288 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1289 {
1290   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1291   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1292   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1293   PetscErrorCode ierr;
1294   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1295   PetscInt       n,i,jrow,j;
1296 
1297   PetscFunctionBegin;
1298   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1299   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1300   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1301   idx  = a->j;
1302   v    = a->a;
1303   ii   = a->i;
1304 
1305   for (i=0; i<m; i++) {
1306     jrow = ii[i];
1307     n    = ii[i+1] - jrow;
1308     sum1  = 0.0;
1309     sum2  = 0.0;
1310     sum3  = 0.0;
1311     sum4  = 0.0;
1312     sum5  = 0.0;
1313     sum6  = 0.0;
1314     sum7  = 0.0;
1315     sum8  = 0.0;
1316     for (j=0; j<n; j++) {
1317       sum1 += v[jrow]*x[8*idx[jrow]];
1318       sum2 += v[jrow]*x[8*idx[jrow]+1];
1319       sum3 += v[jrow]*x[8*idx[jrow]+2];
1320       sum4 += v[jrow]*x[8*idx[jrow]+3];
1321       sum5 += v[jrow]*x[8*idx[jrow]+4];
1322       sum6 += v[jrow]*x[8*idx[jrow]+5];
1323       sum7 += v[jrow]*x[8*idx[jrow]+6];
1324       sum8 += v[jrow]*x[8*idx[jrow]+7];
1325       jrow++;
1326      }
1327     y[8*i]   += sum1;
1328     y[8*i+1] += sum2;
1329     y[8*i+2] += sum3;
1330     y[8*i+3] += sum4;
1331     y[8*i+4] += sum5;
1332     y[8*i+5] += sum6;
1333     y[8*i+6] += sum7;
1334     y[8*i+7] += sum8;
1335   }
1336 
1337   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
1338   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1339   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1345 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1346 {
1347   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1348   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1349   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1350   PetscErrorCode ierr;
1351   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1352 
1353   PetscFunctionBegin;
1354   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1355   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1356   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1357   for (i=0; i<m; i++) {
1358     idx    = a->j + a->i[i] ;
1359     v      = a->a + a->i[i] ;
1360     n      = a->i[i+1] - a->i[i];
1361     alpha1 = x[8*i];
1362     alpha2 = x[8*i+1];
1363     alpha3 = x[8*i+2];
1364     alpha4 = x[8*i+3];
1365     alpha5 = x[8*i+4];
1366     alpha6 = x[8*i+5];
1367     alpha7 = x[8*i+6];
1368     alpha8 = x[8*i+7];
1369     while (n-->0) {
1370       y[8*(*idx)]   += alpha1*(*v);
1371       y[8*(*idx)+1] += alpha2*(*v);
1372       y[8*(*idx)+2] += alpha3*(*v);
1373       y[8*(*idx)+3] += alpha4*(*v);
1374       y[8*(*idx)+4] += alpha5*(*v);
1375       y[8*(*idx)+5] += alpha6*(*v);
1376       y[8*(*idx)+6] += alpha7*(*v);
1377       y[8*(*idx)+7] += alpha8*(*v);
1378       idx++; v++;
1379     }
1380   }
1381   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
1382   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1383   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 /* ------------------------------------------------------------------------------*/
1388 #undef __FUNCT__
1389 #define __FUNCT__ "MatMult_SeqMAIJ_9"
1390 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1391 {
1392   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1393   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1394   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1395   PetscErrorCode ierr;
1396   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1397   PetscInt       n,i,jrow,j;
1398 
1399   PetscFunctionBegin;
1400   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1401   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1402   idx  = a->j;
1403   v    = a->a;
1404   ii   = a->i;
1405 
1406   for (i=0; i<m; i++) {
1407     jrow = ii[i];
1408     n    = ii[i+1] - jrow;
1409     sum1  = 0.0;
1410     sum2  = 0.0;
1411     sum3  = 0.0;
1412     sum4  = 0.0;
1413     sum5  = 0.0;
1414     sum6  = 0.0;
1415     sum7  = 0.0;
1416     sum8  = 0.0;
1417     sum9  = 0.0;
1418     nonzerorow += (n>0);
1419     for (j=0; j<n; j++) {
1420       sum1 += v[jrow]*x[9*idx[jrow]];
1421       sum2 += v[jrow]*x[9*idx[jrow]+1];
1422       sum3 += v[jrow]*x[9*idx[jrow]+2];
1423       sum4 += v[jrow]*x[9*idx[jrow]+3];
1424       sum5 += v[jrow]*x[9*idx[jrow]+4];
1425       sum6 += v[jrow]*x[9*idx[jrow]+5];
1426       sum7 += v[jrow]*x[9*idx[jrow]+6];
1427       sum8 += v[jrow]*x[9*idx[jrow]+7];
1428       sum9 += v[jrow]*x[9*idx[jrow]+8];
1429       jrow++;
1430      }
1431     y[9*i]   = sum1;
1432     y[9*i+1] = sum2;
1433     y[9*i+2] = sum3;
1434     y[9*i+3] = sum4;
1435     y[9*i+4] = sum5;
1436     y[9*i+5] = sum6;
1437     y[9*i+6] = sum7;
1438     y[9*i+7] = sum8;
1439     y[9*i+8] = sum9;
1440   }
1441 
1442   ierr = PetscLogFlops(18*a->nz - 9*nonzerorow);CHKERRQ(ierr);
1443   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1444   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 /* ------------------------------------------------------------------------------*/
1449 
1450 #undef __FUNCT__
1451 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1452 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1453 {
1454   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1455   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1456   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1457   PetscErrorCode ierr;
1458   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1459 
1460   PetscFunctionBegin;
1461   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1462   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1463   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1464 
1465   for (i=0; i<m; i++) {
1466     idx    = a->j + a->i[i] ;
1467     v      = a->a + a->i[i] ;
1468     n      = a->i[i+1] - a->i[i];
1469     alpha1 = x[9*i];
1470     alpha2 = x[9*i+1];
1471     alpha3 = x[9*i+2];
1472     alpha4 = x[9*i+3];
1473     alpha5 = x[9*i+4];
1474     alpha6 = x[9*i+5];
1475     alpha7 = x[9*i+6];
1476     alpha8 = x[9*i+7];
1477     alpha9 = x[9*i+8];
1478     while (n-->0) {
1479       y[9*(*idx)]   += alpha1*(*v);
1480       y[9*(*idx)+1] += alpha2*(*v);
1481       y[9*(*idx)+2] += alpha3*(*v);
1482       y[9*(*idx)+3] += alpha4*(*v);
1483       y[9*(*idx)+4] += alpha5*(*v);
1484       y[9*(*idx)+5] += alpha6*(*v);
1485       y[9*(*idx)+6] += alpha7*(*v);
1486       y[9*(*idx)+7] += alpha8*(*v);
1487       y[9*(*idx)+8] += alpha9*(*v);
1488       idx++; v++;
1489     }
1490   }
1491   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1492   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1493   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1494   PetscFunctionReturn(0);
1495 }
1496 
1497 #undef __FUNCT__
1498 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1499 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1500 {
1501   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1502   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1503   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1504   PetscErrorCode ierr;
1505   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1506   PetscInt       n,i,jrow,j;
1507 
1508   PetscFunctionBegin;
1509   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1510   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1511   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1512   idx  = a->j;
1513   v    = a->a;
1514   ii   = a->i;
1515 
1516   for (i=0; i<m; i++) {
1517     jrow = ii[i];
1518     n    = ii[i+1] - jrow;
1519     sum1  = 0.0;
1520     sum2  = 0.0;
1521     sum3  = 0.0;
1522     sum4  = 0.0;
1523     sum5  = 0.0;
1524     sum6  = 0.0;
1525     sum7  = 0.0;
1526     sum8  = 0.0;
1527     sum9  = 0.0;
1528     for (j=0; j<n; j++) {
1529       sum1 += v[jrow]*x[9*idx[jrow]];
1530       sum2 += v[jrow]*x[9*idx[jrow]+1];
1531       sum3 += v[jrow]*x[9*idx[jrow]+2];
1532       sum4 += v[jrow]*x[9*idx[jrow]+3];
1533       sum5 += v[jrow]*x[9*idx[jrow]+4];
1534       sum6 += v[jrow]*x[9*idx[jrow]+5];
1535       sum7 += v[jrow]*x[9*idx[jrow]+6];
1536       sum8 += v[jrow]*x[9*idx[jrow]+7];
1537       sum9 += v[jrow]*x[9*idx[jrow]+8];
1538       jrow++;
1539      }
1540     y[9*i]   += sum1;
1541     y[9*i+1] += sum2;
1542     y[9*i+2] += sum3;
1543     y[9*i+3] += sum4;
1544     y[9*i+4] += sum5;
1545     y[9*i+5] += sum6;
1546     y[9*i+6] += sum7;
1547     y[9*i+7] += sum8;
1548     y[9*i+8] += sum9;
1549   }
1550 
1551   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1552   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1553   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 #undef __FUNCT__
1558 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1559 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1560 {
1561   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1562   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1563   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1564   PetscErrorCode ierr;
1565   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1566 
1567   PetscFunctionBegin;
1568   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1569   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1570   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1571   for (i=0; i<m; i++) {
1572     idx    = a->j + a->i[i] ;
1573     v      = a->a + a->i[i] ;
1574     n      = a->i[i+1] - a->i[i];
1575     alpha1 = x[9*i];
1576     alpha2 = x[9*i+1];
1577     alpha3 = x[9*i+2];
1578     alpha4 = x[9*i+3];
1579     alpha5 = x[9*i+4];
1580     alpha6 = x[9*i+5];
1581     alpha7 = x[9*i+6];
1582     alpha8 = x[9*i+7];
1583     alpha9 = x[9*i+8];
1584     while (n-->0) {
1585       y[9*(*idx)]   += alpha1*(*v);
1586       y[9*(*idx)+1] += alpha2*(*v);
1587       y[9*(*idx)+2] += alpha3*(*v);
1588       y[9*(*idx)+3] += alpha4*(*v);
1589       y[9*(*idx)+4] += alpha5*(*v);
1590       y[9*(*idx)+5] += alpha6*(*v);
1591       y[9*(*idx)+6] += alpha7*(*v);
1592       y[9*(*idx)+7] += alpha8*(*v);
1593       y[9*(*idx)+8] += alpha9*(*v);
1594       idx++; v++;
1595     }
1596   }
1597   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1598   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1599   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1600   PetscFunctionReturn(0);
1601 }
1602 /*--------------------------------------------------------------------------------------------*/
1603 #undef __FUNCT__
1604 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1605 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1606 {
1607   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1608   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1609   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1610   PetscErrorCode ierr;
1611   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1612   PetscInt       n,i,jrow,j;
1613 
1614   PetscFunctionBegin;
1615   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1616   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1617   idx  = a->j;
1618   v    = a->a;
1619   ii   = a->i;
1620 
1621   for (i=0; i<m; i++) {
1622     jrow = ii[i];
1623     n    = ii[i+1] - jrow;
1624     sum1  = 0.0;
1625     sum2  = 0.0;
1626     sum3  = 0.0;
1627     sum4  = 0.0;
1628     sum5  = 0.0;
1629     sum6  = 0.0;
1630     sum7  = 0.0;
1631     sum8  = 0.0;
1632     sum9  = 0.0;
1633     sum10 = 0.0;
1634     nonzerorow += (n>0);
1635     for (j=0; j<n; j++) {
1636       sum1  += v[jrow]*x[10*idx[jrow]];
1637       sum2  += v[jrow]*x[10*idx[jrow]+1];
1638       sum3  += v[jrow]*x[10*idx[jrow]+2];
1639       sum4  += v[jrow]*x[10*idx[jrow]+3];
1640       sum5  += v[jrow]*x[10*idx[jrow]+4];
1641       sum6  += v[jrow]*x[10*idx[jrow]+5];
1642       sum7  += v[jrow]*x[10*idx[jrow]+6];
1643       sum8  += v[jrow]*x[10*idx[jrow]+7];
1644       sum9  += v[jrow]*x[10*idx[jrow]+8];
1645       sum10 += v[jrow]*x[10*idx[jrow]+9];
1646       jrow++;
1647      }
1648     y[10*i]   = sum1;
1649     y[10*i+1] = sum2;
1650     y[10*i+2] = sum3;
1651     y[10*i+3] = sum4;
1652     y[10*i+4] = sum5;
1653     y[10*i+5] = sum6;
1654     y[10*i+6] = sum7;
1655     y[10*i+7] = sum8;
1656     y[10*i+8] = sum9;
1657     y[10*i+9] = sum10;
1658   }
1659 
1660   ierr = PetscLogFlops(20*a->nz - 10*nonzerorow);CHKERRQ(ierr);
1661   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1662   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 #undef __FUNCT__
1667 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1668 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1669 {
1670   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1671   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1672   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1673   PetscErrorCode ierr;
1674   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1675   PetscInt       n,i,jrow,j;
1676 
1677   PetscFunctionBegin;
1678   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1679   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1680   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1681   idx  = a->j;
1682   v    = a->a;
1683   ii   = a->i;
1684 
1685   for (i=0; i<m; i++) {
1686     jrow = ii[i];
1687     n    = ii[i+1] - jrow;
1688     sum1  = 0.0;
1689     sum2  = 0.0;
1690     sum3  = 0.0;
1691     sum4  = 0.0;
1692     sum5  = 0.0;
1693     sum6  = 0.0;
1694     sum7  = 0.0;
1695     sum8  = 0.0;
1696     sum9  = 0.0;
1697     sum10 = 0.0;
1698     for (j=0; j<n; j++) {
1699       sum1  += v[jrow]*x[10*idx[jrow]];
1700       sum2  += v[jrow]*x[10*idx[jrow]+1];
1701       sum3  += v[jrow]*x[10*idx[jrow]+2];
1702       sum4  += v[jrow]*x[10*idx[jrow]+3];
1703       sum5  += v[jrow]*x[10*idx[jrow]+4];
1704       sum6  += v[jrow]*x[10*idx[jrow]+5];
1705       sum7  += v[jrow]*x[10*idx[jrow]+6];
1706       sum8  += v[jrow]*x[10*idx[jrow]+7];
1707       sum9  += v[jrow]*x[10*idx[jrow]+8];
1708       sum10 += v[jrow]*x[10*idx[jrow]+9];
1709       jrow++;
1710      }
1711     y[10*i]   += sum1;
1712     y[10*i+1] += sum2;
1713     y[10*i+2] += sum3;
1714     y[10*i+3] += sum4;
1715     y[10*i+4] += sum5;
1716     y[10*i+5] += sum6;
1717     y[10*i+6] += sum7;
1718     y[10*i+7] += sum8;
1719     y[10*i+8] += sum9;
1720     y[10*i+9] += sum10;
1721   }
1722 
1723   ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr);
1724   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1725   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 #undef __FUNCT__
1730 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1731 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1732 {
1733   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1734   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1735   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1736   PetscErrorCode ierr;
1737   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1738 
1739   PetscFunctionBegin;
1740   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1741   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1742   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1743 
1744   for (i=0; i<m; i++) {
1745     idx    = a->j + a->i[i] ;
1746     v      = a->a + a->i[i] ;
1747     n      = a->i[i+1] - a->i[i];
1748     alpha1 = x[10*i];
1749     alpha2 = x[10*i+1];
1750     alpha3 = x[10*i+2];
1751     alpha4 = x[10*i+3];
1752     alpha5 = x[10*i+4];
1753     alpha6 = x[10*i+5];
1754     alpha7 = x[10*i+6];
1755     alpha8 = x[10*i+7];
1756     alpha9 = x[10*i+8];
1757     alpha10 = x[10*i+9];
1758     while (n-->0) {
1759       y[10*(*idx)]   += alpha1*(*v);
1760       y[10*(*idx)+1] += alpha2*(*v);
1761       y[10*(*idx)+2] += alpha3*(*v);
1762       y[10*(*idx)+3] += alpha4*(*v);
1763       y[10*(*idx)+4] += alpha5*(*v);
1764       y[10*(*idx)+5] += alpha6*(*v);
1765       y[10*(*idx)+6] += alpha7*(*v);
1766       y[10*(*idx)+7] += alpha8*(*v);
1767       y[10*(*idx)+8] += alpha9*(*v);
1768       y[10*(*idx)+9] += alpha10*(*v);
1769       idx++; v++;
1770     }
1771   }
1772   ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr);
1773   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1774   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 #undef __FUNCT__
1779 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1780 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1781 {
1782   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1783   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1784   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1785   PetscErrorCode ierr;
1786   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1787 
1788   PetscFunctionBegin;
1789   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1790   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1791   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1792   for (i=0; i<m; i++) {
1793     idx    = a->j + a->i[i] ;
1794     v      = a->a + a->i[i] ;
1795     n      = a->i[i+1] - a->i[i];
1796     alpha1 = x[10*i];
1797     alpha2 = x[10*i+1];
1798     alpha3 = x[10*i+2];
1799     alpha4 = x[10*i+3];
1800     alpha5 = x[10*i+4];
1801     alpha6 = x[10*i+5];
1802     alpha7 = x[10*i+6];
1803     alpha8 = x[10*i+7];
1804     alpha9 = x[10*i+8];
1805     alpha10 = x[10*i+9];
1806     while (n-->0) {
1807       y[10*(*idx)]   += alpha1*(*v);
1808       y[10*(*idx)+1] += alpha2*(*v);
1809       y[10*(*idx)+2] += alpha3*(*v);
1810       y[10*(*idx)+3] += alpha4*(*v);
1811       y[10*(*idx)+4] += alpha5*(*v);
1812       y[10*(*idx)+5] += alpha6*(*v);
1813       y[10*(*idx)+6] += alpha7*(*v);
1814       y[10*(*idx)+7] += alpha8*(*v);
1815       y[10*(*idx)+8] += alpha9*(*v);
1816       y[10*(*idx)+9] += alpha10*(*v);
1817       idx++; v++;
1818     }
1819   }
1820   ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr);
1821   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1822   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 
1827 /*--------------------------------------------------------------------------------------------*/
1828 #undef __FUNCT__
1829 #define __FUNCT__ "MatMult_SeqMAIJ_16"
1830 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1831 {
1832   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1833   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1834   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1835   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1836   PetscErrorCode ierr;
1837   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1838   PetscInt       n,i,jrow,j;
1839 
1840   PetscFunctionBegin;
1841   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1842   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1843   idx  = a->j;
1844   v    = a->a;
1845   ii   = a->i;
1846 
1847   for (i=0; i<m; i++) {
1848     jrow = ii[i];
1849     n    = ii[i+1] - jrow;
1850     sum1  = 0.0;
1851     sum2  = 0.0;
1852     sum3  = 0.0;
1853     sum4  = 0.0;
1854     sum5  = 0.0;
1855     sum6  = 0.0;
1856     sum7  = 0.0;
1857     sum8  = 0.0;
1858     sum9  = 0.0;
1859     sum10 = 0.0;
1860     sum11 = 0.0;
1861     sum12 = 0.0;
1862     sum13 = 0.0;
1863     sum14 = 0.0;
1864     sum15 = 0.0;
1865     sum16 = 0.0;
1866     nonzerorow += (n>0);
1867     for (j=0; j<n; j++) {
1868       sum1  += v[jrow]*x[16*idx[jrow]];
1869       sum2  += v[jrow]*x[16*idx[jrow]+1];
1870       sum3  += v[jrow]*x[16*idx[jrow]+2];
1871       sum4  += v[jrow]*x[16*idx[jrow]+3];
1872       sum5  += v[jrow]*x[16*idx[jrow]+4];
1873       sum6  += v[jrow]*x[16*idx[jrow]+5];
1874       sum7  += v[jrow]*x[16*idx[jrow]+6];
1875       sum8  += v[jrow]*x[16*idx[jrow]+7];
1876       sum9  += v[jrow]*x[16*idx[jrow]+8];
1877       sum10 += v[jrow]*x[16*idx[jrow]+9];
1878       sum11 += v[jrow]*x[16*idx[jrow]+10];
1879       sum12 += v[jrow]*x[16*idx[jrow]+11];
1880       sum13 += v[jrow]*x[16*idx[jrow]+12];
1881       sum14 += v[jrow]*x[16*idx[jrow]+13];
1882       sum15 += v[jrow]*x[16*idx[jrow]+14];
1883       sum16 += v[jrow]*x[16*idx[jrow]+15];
1884       jrow++;
1885      }
1886     y[16*i]    = sum1;
1887     y[16*i+1]  = sum2;
1888     y[16*i+2]  = sum3;
1889     y[16*i+3]  = sum4;
1890     y[16*i+4]  = sum5;
1891     y[16*i+5]  = sum6;
1892     y[16*i+6]  = sum7;
1893     y[16*i+7]  = sum8;
1894     y[16*i+8]  = sum9;
1895     y[16*i+9]  = sum10;
1896     y[16*i+10] = sum11;
1897     y[16*i+11] = sum12;
1898     y[16*i+12] = sum13;
1899     y[16*i+13] = sum14;
1900     y[16*i+14] = sum15;
1901     y[16*i+15] = sum16;
1902   }
1903 
1904   ierr = PetscLogFlops(32*a->nz - 16*nonzerorow);CHKERRQ(ierr);
1905   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1906   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 #undef __FUNCT__
1911 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1912 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1913 {
1914   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1915   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1916   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1917   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1918   PetscErrorCode ierr;
1919   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1920 
1921   PetscFunctionBegin;
1922   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1923   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1924   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1925 
1926   for (i=0; i<m; i++) {
1927     idx    = a->j + a->i[i] ;
1928     v      = a->a + a->i[i] ;
1929     n      = a->i[i+1] - a->i[i];
1930     alpha1  = x[16*i];
1931     alpha2  = x[16*i+1];
1932     alpha3  = x[16*i+2];
1933     alpha4  = x[16*i+3];
1934     alpha5  = x[16*i+4];
1935     alpha6  = x[16*i+5];
1936     alpha7  = x[16*i+6];
1937     alpha8  = x[16*i+7];
1938     alpha9  = x[16*i+8];
1939     alpha10 = x[16*i+9];
1940     alpha11 = x[16*i+10];
1941     alpha12 = x[16*i+11];
1942     alpha13 = x[16*i+12];
1943     alpha14 = x[16*i+13];
1944     alpha15 = x[16*i+14];
1945     alpha16 = x[16*i+15];
1946     while (n-->0) {
1947       y[16*(*idx)]    += alpha1*(*v);
1948       y[16*(*idx)+1]  += alpha2*(*v);
1949       y[16*(*idx)+2]  += alpha3*(*v);
1950       y[16*(*idx)+3]  += alpha4*(*v);
1951       y[16*(*idx)+4]  += alpha5*(*v);
1952       y[16*(*idx)+5]  += alpha6*(*v);
1953       y[16*(*idx)+6]  += alpha7*(*v);
1954       y[16*(*idx)+7]  += alpha8*(*v);
1955       y[16*(*idx)+8]  += alpha9*(*v);
1956       y[16*(*idx)+9]  += alpha10*(*v);
1957       y[16*(*idx)+10] += alpha11*(*v);
1958       y[16*(*idx)+11] += alpha12*(*v);
1959       y[16*(*idx)+12] += alpha13*(*v);
1960       y[16*(*idx)+13] += alpha14*(*v);
1961       y[16*(*idx)+14] += alpha15*(*v);
1962       y[16*(*idx)+15] += alpha16*(*v);
1963       idx++; v++;
1964     }
1965   }
1966   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
1967   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1968   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
1974 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1975 {
1976   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1977   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1978   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1979   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1980   PetscErrorCode ierr;
1981   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1982   PetscInt       n,i,jrow,j;
1983 
1984   PetscFunctionBegin;
1985   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1986   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1987   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1988   idx  = a->j;
1989   v    = a->a;
1990   ii   = a->i;
1991 
1992   for (i=0; i<m; i++) {
1993     jrow = ii[i];
1994     n    = ii[i+1] - jrow;
1995     sum1  = 0.0;
1996     sum2  = 0.0;
1997     sum3  = 0.0;
1998     sum4  = 0.0;
1999     sum5  = 0.0;
2000     sum6  = 0.0;
2001     sum7  = 0.0;
2002     sum8  = 0.0;
2003     sum9  = 0.0;
2004     sum10 = 0.0;
2005     sum11 = 0.0;
2006     sum12 = 0.0;
2007     sum13 = 0.0;
2008     sum14 = 0.0;
2009     sum15 = 0.0;
2010     sum16 = 0.0;
2011     for (j=0; j<n; j++) {
2012       sum1  += v[jrow]*x[16*idx[jrow]];
2013       sum2  += v[jrow]*x[16*idx[jrow]+1];
2014       sum3  += v[jrow]*x[16*idx[jrow]+2];
2015       sum4  += v[jrow]*x[16*idx[jrow]+3];
2016       sum5  += v[jrow]*x[16*idx[jrow]+4];
2017       sum6  += v[jrow]*x[16*idx[jrow]+5];
2018       sum7  += v[jrow]*x[16*idx[jrow]+6];
2019       sum8  += v[jrow]*x[16*idx[jrow]+7];
2020       sum9  += v[jrow]*x[16*idx[jrow]+8];
2021       sum10 += v[jrow]*x[16*idx[jrow]+9];
2022       sum11 += v[jrow]*x[16*idx[jrow]+10];
2023       sum12 += v[jrow]*x[16*idx[jrow]+11];
2024       sum13 += v[jrow]*x[16*idx[jrow]+12];
2025       sum14 += v[jrow]*x[16*idx[jrow]+13];
2026       sum15 += v[jrow]*x[16*idx[jrow]+14];
2027       sum16 += v[jrow]*x[16*idx[jrow]+15];
2028       jrow++;
2029      }
2030     y[16*i]    += sum1;
2031     y[16*i+1]  += sum2;
2032     y[16*i+2]  += sum3;
2033     y[16*i+3]  += sum4;
2034     y[16*i+4]  += sum5;
2035     y[16*i+5]  += sum6;
2036     y[16*i+6]  += sum7;
2037     y[16*i+7]  += sum8;
2038     y[16*i+8]  += sum9;
2039     y[16*i+9]  += sum10;
2040     y[16*i+10] += sum11;
2041     y[16*i+11] += sum12;
2042     y[16*i+12] += sum13;
2043     y[16*i+13] += sum14;
2044     y[16*i+14] += sum15;
2045     y[16*i+15] += sum16;
2046   }
2047 
2048   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
2049   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2050   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2051   PetscFunctionReturn(0);
2052 }
2053 
2054 #undef __FUNCT__
2055 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2056 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2057 {
2058   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2059   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2060   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2061   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2062   PetscErrorCode ierr;
2063   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
2064 
2065   PetscFunctionBegin;
2066   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2067   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2068   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2069   for (i=0; i<m; i++) {
2070     idx    = a->j + a->i[i] ;
2071     v      = a->a + a->i[i] ;
2072     n      = a->i[i+1] - a->i[i];
2073     alpha1 = x[16*i];
2074     alpha2 = x[16*i+1];
2075     alpha3 = x[16*i+2];
2076     alpha4 = x[16*i+3];
2077     alpha5 = x[16*i+4];
2078     alpha6 = x[16*i+5];
2079     alpha7 = x[16*i+6];
2080     alpha8 = x[16*i+7];
2081     alpha9  = x[16*i+8];
2082     alpha10 = x[16*i+9];
2083     alpha11 = x[16*i+10];
2084     alpha12 = x[16*i+11];
2085     alpha13 = x[16*i+12];
2086     alpha14 = x[16*i+13];
2087     alpha15 = x[16*i+14];
2088     alpha16 = x[16*i+15];
2089     while (n-->0) {
2090       y[16*(*idx)]   += alpha1*(*v);
2091       y[16*(*idx)+1] += alpha2*(*v);
2092       y[16*(*idx)+2] += alpha3*(*v);
2093       y[16*(*idx)+3] += alpha4*(*v);
2094       y[16*(*idx)+4] += alpha5*(*v);
2095       y[16*(*idx)+5] += alpha6*(*v);
2096       y[16*(*idx)+6] += alpha7*(*v);
2097       y[16*(*idx)+7] += alpha8*(*v);
2098       y[16*(*idx)+8]  += alpha9*(*v);
2099       y[16*(*idx)+9]  += alpha10*(*v);
2100       y[16*(*idx)+10] += alpha11*(*v);
2101       y[16*(*idx)+11] += alpha12*(*v);
2102       y[16*(*idx)+12] += alpha13*(*v);
2103       y[16*(*idx)+13] += alpha14*(*v);
2104       y[16*(*idx)+14] += alpha15*(*v);
2105       y[16*(*idx)+15] += alpha16*(*v);
2106       idx++; v++;
2107     }
2108   }
2109   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
2110   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2111   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2112   PetscFunctionReturn(0);
2113 }
2114 
2115 /*--------------------------------------------------------------------------------------------*/
2116 #undef __FUNCT__
2117 #define __FUNCT__ "MatMult_SeqMAIJ_18"
2118 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2119 {
2120   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2121   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2122   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2123   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2124   PetscErrorCode ierr;
2125   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
2126   PetscInt       n,i,jrow,j;
2127 
2128   PetscFunctionBegin;
2129   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2130   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2131   idx  = a->j;
2132   v    = a->a;
2133   ii   = a->i;
2134 
2135   for (i=0; i<m; i++) {
2136     jrow = ii[i];
2137     n    = ii[i+1] - jrow;
2138     sum1  = 0.0;
2139     sum2  = 0.0;
2140     sum3  = 0.0;
2141     sum4  = 0.0;
2142     sum5  = 0.0;
2143     sum6  = 0.0;
2144     sum7  = 0.0;
2145     sum8  = 0.0;
2146     sum9  = 0.0;
2147     sum10 = 0.0;
2148     sum11 = 0.0;
2149     sum12 = 0.0;
2150     sum13 = 0.0;
2151     sum14 = 0.0;
2152     sum15 = 0.0;
2153     sum16 = 0.0;
2154     sum17 = 0.0;
2155     sum18 = 0.0;
2156     nonzerorow += (n>0);
2157     for (j=0; j<n; j++) {
2158       sum1  += v[jrow]*x[18*idx[jrow]];
2159       sum2  += v[jrow]*x[18*idx[jrow]+1];
2160       sum3  += v[jrow]*x[18*idx[jrow]+2];
2161       sum4  += v[jrow]*x[18*idx[jrow]+3];
2162       sum5  += v[jrow]*x[18*idx[jrow]+4];
2163       sum6  += v[jrow]*x[18*idx[jrow]+5];
2164       sum7  += v[jrow]*x[18*idx[jrow]+6];
2165       sum8  += v[jrow]*x[18*idx[jrow]+7];
2166       sum9  += v[jrow]*x[18*idx[jrow]+8];
2167       sum10 += v[jrow]*x[18*idx[jrow]+9];
2168       sum11 += v[jrow]*x[18*idx[jrow]+10];
2169       sum12 += v[jrow]*x[18*idx[jrow]+11];
2170       sum13 += v[jrow]*x[18*idx[jrow]+12];
2171       sum14 += v[jrow]*x[18*idx[jrow]+13];
2172       sum15 += v[jrow]*x[18*idx[jrow]+14];
2173       sum16 += v[jrow]*x[18*idx[jrow]+15];
2174       sum17 += v[jrow]*x[18*idx[jrow]+16];
2175       sum18 += v[jrow]*x[18*idx[jrow]+17];
2176       jrow++;
2177      }
2178     y[18*i]    = sum1;
2179     y[18*i+1]  = sum2;
2180     y[18*i+2]  = sum3;
2181     y[18*i+3]  = sum4;
2182     y[18*i+4]  = sum5;
2183     y[18*i+5]  = sum6;
2184     y[18*i+6]  = sum7;
2185     y[18*i+7]  = sum8;
2186     y[18*i+8]  = sum9;
2187     y[18*i+9]  = sum10;
2188     y[18*i+10] = sum11;
2189     y[18*i+11] = sum12;
2190     y[18*i+12] = sum13;
2191     y[18*i+13] = sum14;
2192     y[18*i+14] = sum15;
2193     y[18*i+15] = sum16;
2194     y[18*i+16] = sum17;
2195     y[18*i+17] = sum18;
2196   }
2197 
2198   ierr = PetscLogFlops(36*a->nz - 18*nonzerorow);CHKERRQ(ierr);
2199   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2200   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 #undef __FUNCT__
2205 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18"
2206 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2207 {
2208   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2209   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2210   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
2211   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2212   PetscErrorCode ierr;
2213   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
2214 
2215   PetscFunctionBegin;
2216   ierr = VecSet(yy,zero);CHKERRQ(ierr);
2217   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2218   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2219 
2220   for (i=0; i<m; i++) {
2221     idx    = a->j + a->i[i] ;
2222     v      = a->a + a->i[i] ;
2223     n      = a->i[i+1] - a->i[i];
2224     alpha1  = x[18*i];
2225     alpha2  = x[18*i+1];
2226     alpha3  = x[18*i+2];
2227     alpha4  = x[18*i+3];
2228     alpha5  = x[18*i+4];
2229     alpha6  = x[18*i+5];
2230     alpha7  = x[18*i+6];
2231     alpha8  = x[18*i+7];
2232     alpha9  = x[18*i+8];
2233     alpha10 = x[18*i+9];
2234     alpha11 = x[18*i+10];
2235     alpha12 = x[18*i+11];
2236     alpha13 = x[18*i+12];
2237     alpha14 = x[18*i+13];
2238     alpha15 = x[18*i+14];
2239     alpha16 = x[18*i+15];
2240     alpha17 = x[18*i+16];
2241     alpha18 = x[18*i+17];
2242     while (n-->0) {
2243       y[18*(*idx)]    += alpha1*(*v);
2244       y[18*(*idx)+1]  += alpha2*(*v);
2245       y[18*(*idx)+2]  += alpha3*(*v);
2246       y[18*(*idx)+3]  += alpha4*(*v);
2247       y[18*(*idx)+4]  += alpha5*(*v);
2248       y[18*(*idx)+5]  += alpha6*(*v);
2249       y[18*(*idx)+6]  += alpha7*(*v);
2250       y[18*(*idx)+7]  += alpha8*(*v);
2251       y[18*(*idx)+8]  += alpha9*(*v);
2252       y[18*(*idx)+9]  += alpha10*(*v);
2253       y[18*(*idx)+10] += alpha11*(*v);
2254       y[18*(*idx)+11] += alpha12*(*v);
2255       y[18*(*idx)+12] += alpha13*(*v);
2256       y[18*(*idx)+13] += alpha14*(*v);
2257       y[18*(*idx)+14] += alpha15*(*v);
2258       y[18*(*idx)+15] += alpha16*(*v);
2259       y[18*(*idx)+16] += alpha17*(*v);
2260       y[18*(*idx)+17] += alpha18*(*v);
2261       idx++; v++;
2262     }
2263   }
2264   ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr);
2265   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2266   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2267   PetscFunctionReturn(0);
2268 }
2269 
2270 #undef __FUNCT__
2271 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18"
2272 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2273 {
2274   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2275   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2276   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2277   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2278   PetscErrorCode ierr;
2279   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
2280   PetscInt       n,i,jrow,j;
2281 
2282   PetscFunctionBegin;
2283   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2284   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2285   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2286   idx  = a->j;
2287   v    = a->a;
2288   ii   = a->i;
2289 
2290   for (i=0; i<m; i++) {
2291     jrow = ii[i];
2292     n    = ii[i+1] - jrow;
2293     sum1  = 0.0;
2294     sum2  = 0.0;
2295     sum3  = 0.0;
2296     sum4  = 0.0;
2297     sum5  = 0.0;
2298     sum6  = 0.0;
2299     sum7  = 0.0;
2300     sum8  = 0.0;
2301     sum9  = 0.0;
2302     sum10 = 0.0;
2303     sum11 = 0.0;
2304     sum12 = 0.0;
2305     sum13 = 0.0;
2306     sum14 = 0.0;
2307     sum15 = 0.0;
2308     sum16 = 0.0;
2309     sum17 = 0.0;
2310     sum18 = 0.0;
2311     for (j=0; j<n; j++) {
2312       sum1  += v[jrow]*x[18*idx[jrow]];
2313       sum2  += v[jrow]*x[18*idx[jrow]+1];
2314       sum3  += v[jrow]*x[18*idx[jrow]+2];
2315       sum4  += v[jrow]*x[18*idx[jrow]+3];
2316       sum5  += v[jrow]*x[18*idx[jrow]+4];
2317       sum6  += v[jrow]*x[18*idx[jrow]+5];
2318       sum7  += v[jrow]*x[18*idx[jrow]+6];
2319       sum8  += v[jrow]*x[18*idx[jrow]+7];
2320       sum9  += v[jrow]*x[18*idx[jrow]+8];
2321       sum10 += v[jrow]*x[18*idx[jrow]+9];
2322       sum11 += v[jrow]*x[18*idx[jrow]+10];
2323       sum12 += v[jrow]*x[18*idx[jrow]+11];
2324       sum13 += v[jrow]*x[18*idx[jrow]+12];
2325       sum14 += v[jrow]*x[18*idx[jrow]+13];
2326       sum15 += v[jrow]*x[18*idx[jrow]+14];
2327       sum16 += v[jrow]*x[18*idx[jrow]+15];
2328       sum17 += v[jrow]*x[18*idx[jrow]+16];
2329       sum18 += v[jrow]*x[18*idx[jrow]+17];
2330       jrow++;
2331      }
2332     y[18*i]    += sum1;
2333     y[18*i+1]  += sum2;
2334     y[18*i+2]  += sum3;
2335     y[18*i+3]  += sum4;
2336     y[18*i+4]  += sum5;
2337     y[18*i+5]  += sum6;
2338     y[18*i+6]  += sum7;
2339     y[18*i+7]  += sum8;
2340     y[18*i+8]  += sum9;
2341     y[18*i+9]  += sum10;
2342     y[18*i+10] += sum11;
2343     y[18*i+11] += sum12;
2344     y[18*i+12] += sum13;
2345     y[18*i+13] += sum14;
2346     y[18*i+14] += sum15;
2347     y[18*i+15] += sum16;
2348     y[18*i+16] += sum17;
2349     y[18*i+17] += sum18;
2350   }
2351 
2352   ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr);
2353   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2354   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2355   PetscFunctionReturn(0);
2356 }
2357 
2358 #undef __FUNCT__
2359 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18"
2360 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2361 {
2362   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2363   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2364   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2365   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2366   PetscErrorCode ierr;
2367   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
2368 
2369   PetscFunctionBegin;
2370   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2371   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2372   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2373   for (i=0; i<m; i++) {
2374     idx    = a->j + a->i[i] ;
2375     v      = a->a + a->i[i] ;
2376     n      = a->i[i+1] - a->i[i];
2377     alpha1 = x[18*i];
2378     alpha2 = x[18*i+1];
2379     alpha3 = x[18*i+2];
2380     alpha4 = x[18*i+3];
2381     alpha5 = x[18*i+4];
2382     alpha6 = x[18*i+5];
2383     alpha7 = x[18*i+6];
2384     alpha8 = x[18*i+7];
2385     alpha9  = x[18*i+8];
2386     alpha10 = x[18*i+9];
2387     alpha11 = x[18*i+10];
2388     alpha12 = x[18*i+11];
2389     alpha13 = x[18*i+12];
2390     alpha14 = x[18*i+13];
2391     alpha15 = x[18*i+14];
2392     alpha16 = x[18*i+15];
2393     alpha17 = x[18*i+16];
2394     alpha18 = x[18*i+17];
2395     while (n-->0) {
2396       y[18*(*idx)]   += alpha1*(*v);
2397       y[18*(*idx)+1] += alpha2*(*v);
2398       y[18*(*idx)+2] += alpha3*(*v);
2399       y[18*(*idx)+3] += alpha4*(*v);
2400       y[18*(*idx)+4] += alpha5*(*v);
2401       y[18*(*idx)+5] += alpha6*(*v);
2402       y[18*(*idx)+6] += alpha7*(*v);
2403       y[18*(*idx)+7] += alpha8*(*v);
2404       y[18*(*idx)+8]  += alpha9*(*v);
2405       y[18*(*idx)+9]  += alpha10*(*v);
2406       y[18*(*idx)+10] += alpha11*(*v);
2407       y[18*(*idx)+11] += alpha12*(*v);
2408       y[18*(*idx)+12] += alpha13*(*v);
2409       y[18*(*idx)+13] += alpha14*(*v);
2410       y[18*(*idx)+14] += alpha15*(*v);
2411       y[18*(*idx)+15] += alpha16*(*v);
2412       y[18*(*idx)+16] += alpha17*(*v);
2413       y[18*(*idx)+17] += alpha18*(*v);
2414       idx++; v++;
2415     }
2416   }
2417   ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr);
2418   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2419   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 /*===================================================================================*/
2424 #undef __FUNCT__
2425 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2426 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2427 {
2428   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2429   PetscErrorCode ierr;
2430 
2431   PetscFunctionBegin;
2432   /* start the scatter */
2433   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2434   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2435   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2436   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 #undef __FUNCT__
2441 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2442 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2443 {
2444   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2445   PetscErrorCode ierr;
2446 
2447   PetscFunctionBegin;
2448   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2449   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2450   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2451   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 #undef __FUNCT__
2456 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2457 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2458 {
2459   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2460   PetscErrorCode ierr;
2461 
2462   PetscFunctionBegin;
2463   /* start the scatter */
2464   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2465   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2466   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2467   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2473 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2474 {
2475   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2480   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2481   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2482   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 /* ----------------------------------------------------------------*/
2487 #undef __FUNCT__
2488 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
2489 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2490 {
2491   /* This routine requires testing -- but it's getting better. */
2492   PetscErrorCode     ierr;
2493   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2494   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2495   Mat                P=pp->AIJ;
2496   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2497   PetscInt           *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2498   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2499   PetscInt           an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn;
2500   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2501   MatScalar          *ca;
2502 
2503   PetscFunctionBegin;
2504   /* Start timer */
2505   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
2506 
2507   /* Get ij structure of P^T */
2508   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2509 
2510   cn = pn*ppdof;
2511   /* Allocate ci array, arrays for fill computation and */
2512   /* free space for accumulating nonzero column info */
2513   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
2514   ci[0] = 0;
2515 
2516   /* Work arrays for rows of P^T*A */
2517   ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
2518   ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
2519   ptasparserow = ptadenserow  + an;
2520   denserow     = ptasparserow + an;
2521   sparserow    = denserow     + cn;
2522 
2523   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2524   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2525   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2526   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2527   current_space = free_space;
2528 
2529   /* Determine symbolic info for each row of C: */
2530   for (i=0;i<pn;i++) {
2531     ptnzi  = pti[i+1] - pti[i];
2532     ptJ    = ptj + pti[i];
2533     for (dof=0;dof<ppdof;dof++) {
2534       ptanzi = 0;
2535       /* Determine symbolic row of PtA: */
2536       for (j=0;j<ptnzi;j++) {
2537         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2538         arow = ptJ[j]*ppdof + dof;
2539         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2540         anzj = ai[arow+1] - ai[arow];
2541         ajj  = aj + ai[arow];
2542         for (k=0;k<anzj;k++) {
2543           if (!ptadenserow[ajj[k]]) {
2544             ptadenserow[ajj[k]]    = -1;
2545             ptasparserow[ptanzi++] = ajj[k];
2546           }
2547         }
2548       }
2549       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2550       ptaj = ptasparserow;
2551       cnzi   = 0;
2552       for (j=0;j<ptanzi;j++) {
2553         /* Get offset within block of P */
2554         pshift = *ptaj%ppdof;
2555         /* Get block row of P */
2556         prow = (*ptaj++)/ppdof; /* integer division */
2557         /* P has same number of nonzeros per row as the compressed form */
2558         pnzj = pi[prow+1] - pi[prow];
2559         pjj  = pj + pi[prow];
2560         for (k=0;k<pnzj;k++) {
2561           /* Locations in C are shifted by the offset within the block */
2562           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2563           if (!denserow[pjj[k]*ppdof+pshift]) {
2564             denserow[pjj[k]*ppdof+pshift] = -1;
2565             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2566           }
2567         }
2568       }
2569 
2570       /* sort sparserow */
2571       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
2572 
2573       /* If free space is not available, make more free space */
2574       /* Double the amount of total space in the list */
2575       if (current_space->local_remaining<cnzi) {
2576         ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
2577       }
2578 
2579       /* Copy data into free space, and zero out denserows */
2580       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
2581       current_space->array           += cnzi;
2582       current_space->local_used      += cnzi;
2583       current_space->local_remaining -= cnzi;
2584 
2585       for (j=0;j<ptanzi;j++) {
2586         ptadenserow[ptasparserow[j]] = 0;
2587       }
2588       for (j=0;j<cnzi;j++) {
2589         denserow[sparserow[j]] = 0;
2590       }
2591       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2592       /*        For now, we will recompute what is needed. */
2593       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2594     }
2595   }
2596   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2597   /* Allocate space for cj, initialize cj, and */
2598   /* destroy list of free space and other temporary array(s) */
2599   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
2600   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
2601   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
2602 
2603   /* Allocate space for ca */
2604   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
2605   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
2606 
2607   /* put together the new matrix */
2608   ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
2609 
2610   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2611   /* Since these are PETSc arrays, change flags to free them as necessary. */
2612   c          = (Mat_SeqAIJ *)((*C)->data);
2613   c->free_a  = PETSC_TRUE;
2614   c->free_ij = PETSC_TRUE;
2615   c->nonew   = 0;
2616 
2617   /* Clean up. */
2618   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2619 
2620   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
2621   PetscFunctionReturn(0);
2622 }
2623 
2624 #undef __FUNCT__
2625 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
2626 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2627 {
2628   /* This routine requires testing -- first draft only */
2629   PetscErrorCode ierr;
2630   PetscInt       flops=0;
2631   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
2632   Mat            P=pp->AIJ;
2633   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
2634   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
2635   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
2636   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2637   PetscInt       *ci=c->i,*cj=c->j,*cjj;
2638   PetscInt       am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof;
2639   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2640   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
2641 
2642   PetscFunctionBegin;
2643   /* Allocate temporary array for storage of one row of A*P */
2644   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
2645   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
2646 
2647   apj      = (PetscInt *)(apa + cn);
2648   apjdense = apj + cn;
2649 
2650   /* Clear old values in C */
2651   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
2652 
2653   for (i=0;i<am;i++) {
2654     /* Form sparse row of A*P */
2655     anzi  = ai[i+1] - ai[i];
2656     apnzj = 0;
2657     for (j=0;j<anzi;j++) {
2658       /* Get offset within block of P */
2659       pshift = *aj%ppdof;
2660       /* Get block row of P */
2661       prow   = *aj++/ppdof; /* integer division */
2662       pnzj = pi[prow+1] - pi[prow];
2663       pjj  = pj + pi[prow];
2664       paj  = pa + pi[prow];
2665       for (k=0;k<pnzj;k++) {
2666         poffset = pjj[k]*ppdof+pshift;
2667         if (!apjdense[poffset]) {
2668           apjdense[poffset] = -1;
2669           apj[apnzj++]      = poffset;
2670         }
2671         apa[poffset] += (*aa)*paj[k];
2672       }
2673       flops += 2*pnzj;
2674       aa++;
2675     }
2676 
2677     /* Sort the j index array for quick sparse axpy. */
2678     /* Note: a array does not need sorting as it is in dense storage locations. */
2679     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
2680 
2681     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2682     prow    = i/ppdof; /* integer division */
2683     pshift  = i%ppdof;
2684     poffset = pi[prow];
2685     pnzi = pi[prow+1] - poffset;
2686     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2687     pJ   = pj+poffset;
2688     pA   = pa+poffset;
2689     for (j=0;j<pnzi;j++) {
2690       crow   = (*pJ)*ppdof+pshift;
2691       cjj    = cj + ci[crow];
2692       caj    = ca + ci[crow];
2693       pJ++;
2694       /* Perform sparse axpy operation.  Note cjj includes apj. */
2695       for (k=0,nextap=0;nextap<apnzj;k++) {
2696         if (cjj[k]==apj[nextap]) {
2697           caj[k] += (*pA)*apa[apj[nextap++]];
2698         }
2699       }
2700       flops += 2*apnzj;
2701       pA++;
2702     }
2703 
2704     /* Zero the current row info for A*P */
2705     for (j=0;j<apnzj;j++) {
2706       apa[apj[j]]      = 0.;
2707       apjdense[apj[j]] = 0;
2708     }
2709   }
2710 
2711   /* Assemble the final matrix and clean up */
2712   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2713   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2714   ierr = PetscFree(apa);CHKERRQ(ierr);
2715   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
2716   PetscFunctionReturn(0);
2717 }
2718 
2719 #undef __FUNCT__
2720 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ"
2721 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2722 {
2723   PetscErrorCode    ierr;
2724 
2725   PetscFunctionBegin;
2726   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
2727   ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr);
2728   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
2729   PetscFunctionReturn(0);
2730 }
2731 
2732 #undef __FUNCT__
2733 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ"
2734 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
2735 {
2736   PetscFunctionBegin;
2737   SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
2738   PetscFunctionReturn(0);
2739 }
2740 
2741 EXTERN_C_BEGIN
2742 #undef __FUNCT__
2743 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
2744 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2745 {
2746   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2747   Mat               a = b->AIJ,B;
2748   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
2749   PetscErrorCode    ierr;
2750   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2751   PetscInt          *cols;
2752   PetscScalar       *vals;
2753 
2754   PetscFunctionBegin;
2755   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
2756   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
2757   for (i=0; i<m; i++) {
2758     nmax = PetscMax(nmax,aij->ilen[i]);
2759     for (j=0; j<dof; j++) {
2760       ilen[dof*i+j] = aij->ilen[i];
2761     }
2762   }
2763   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
2764   ierr = PetscFree(ilen);CHKERRQ(ierr);
2765   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
2766   ii   = 0;
2767   for (i=0; i<m; i++) {
2768     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2769     for (j=0; j<dof; j++) {
2770       for (k=0; k<ncols; k++) {
2771         icols[k] = dof*cols[k]+j;
2772       }
2773       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2774       ii++;
2775     }
2776     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2777   }
2778   ierr = PetscFree(icols);CHKERRQ(ierr);
2779   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2780   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2781 
2782   if (reuse == MAT_REUSE_MATRIX) {
2783     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2784   } else {
2785     *newmat = B;
2786   }
2787   PetscFunctionReturn(0);
2788 }
2789 EXTERN_C_END
2790 
2791 #include "src/mat/impls/aij/mpi/mpiaij.h"
2792 
2793 EXTERN_C_BEGIN
2794 #undef __FUNCT__
2795 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
2796 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2797 {
2798   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
2799   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2800   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2801   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2802   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2803   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2804   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
2805   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
2806   PetscInt          rstart,cstart,*garray,ii,k;
2807   PetscErrorCode    ierr;
2808   PetscScalar       *vals,*ovals;
2809 
2810   PetscFunctionBegin;
2811   ierr = PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);CHKERRQ(ierr);
2812   for (i=0; i<A->rmap.n/dof; i++) {
2813     nmax  = PetscMax(nmax,AIJ->ilen[i]);
2814     onmax = PetscMax(onmax,OAIJ->ilen[i]);
2815     for (j=0; j<dof; j++) {
2816       dnz[dof*i+j] = AIJ->ilen[i];
2817       onz[dof*i+j] = OAIJ->ilen[i];
2818     }
2819   }
2820   ierr = MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);CHKERRQ(ierr);
2821   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
2822 
2823   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
2824   rstart = dof*maij->A->rmap.rstart;
2825   cstart = dof*maij->A->cmap.rstart;
2826   garray = mpiaij->garray;
2827 
2828   ii = rstart;
2829   for (i=0; i<A->rmap.n/dof; i++) {
2830     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2831     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
2832     for (j=0; j<dof; j++) {
2833       for (k=0; k<ncols; k++) {
2834         icols[k] = cstart + dof*cols[k]+j;
2835       }
2836       for (k=0; k<oncols; k++) {
2837         oicols[k] = dof*garray[ocols[k]]+j;
2838       }
2839       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2840       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
2841       ii++;
2842     }
2843     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2844     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
2845   }
2846   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
2847 
2848   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2849   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2850 
2851   if (reuse == MAT_REUSE_MATRIX) {
2852     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
2853     ((PetscObject)A)->refct = 1;
2854     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2855     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
2856   } else {
2857     *newmat = B;
2858   }
2859   PetscFunctionReturn(0);
2860 }
2861 EXTERN_C_END
2862 
2863 
2864 /* ---------------------------------------------------------------------------------- */
2865 /*MC
2866   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
2867   operations for multicomponent problems.  It interpolates each component the same
2868   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
2869   and MATMPIAIJ for distributed matrices.
2870 
2871   Operations provided:
2872 + MatMult
2873 . MatMultTranspose
2874 . MatMultAdd
2875 . MatMultTransposeAdd
2876 - MatView
2877 
2878   Level: advanced
2879 
2880 M*/
2881 #undef __FUNCT__
2882 #define __FUNCT__ "MatCreateMAIJ"
2883 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2884 {
2885   PetscErrorCode ierr;
2886   PetscMPIInt    size;
2887   PetscInt       n;
2888   Mat_MPIMAIJ    *b;
2889   Mat            B;
2890 
2891   PetscFunctionBegin;
2892   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
2893 
2894   if (dof == 1) {
2895     *maij = A;
2896   } else {
2897     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2898     ierr = MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);CHKERRQ(ierr);
2899     B->assembled    = PETSC_TRUE;
2900 
2901     ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2902     if (size == 1) {
2903       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
2904       B->ops->destroy = MatDestroy_SeqMAIJ;
2905       B->ops->view    = MatView_SeqMAIJ;
2906       b      = (Mat_MPIMAIJ*)B->data;
2907       b->dof = dof;
2908       b->AIJ = A;
2909       if (dof == 2) {
2910         B->ops->mult             = MatMult_SeqMAIJ_2;
2911         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
2912         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
2913         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2914       } else if (dof == 3) {
2915         B->ops->mult             = MatMult_SeqMAIJ_3;
2916         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
2917         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
2918         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2919       } else if (dof == 4) {
2920         B->ops->mult             = MatMult_SeqMAIJ_4;
2921         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
2922         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
2923         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2924       } else if (dof == 5) {
2925         B->ops->mult             = MatMult_SeqMAIJ_5;
2926         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
2927         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
2928         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2929       } else if (dof == 6) {
2930         B->ops->mult             = MatMult_SeqMAIJ_6;
2931         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
2932         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
2933         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2934       } else if (dof == 7) {
2935         B->ops->mult             = MatMult_SeqMAIJ_7;
2936         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
2937         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
2938         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2939       } else if (dof == 8) {
2940         B->ops->mult             = MatMult_SeqMAIJ_8;
2941         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
2942         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
2943         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2944       } else if (dof == 9) {
2945         B->ops->mult             = MatMult_SeqMAIJ_9;
2946         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
2947         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
2948         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2949       } else if (dof == 10) {
2950         B->ops->mult             = MatMult_SeqMAIJ_10;
2951         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
2952         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
2953         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
2954       } else if (dof == 16) {
2955         B->ops->mult             = MatMult_SeqMAIJ_16;
2956         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
2957         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
2958         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2959       } else if (dof == 18) {
2960         B->ops->mult             = MatMult_SeqMAIJ_18;
2961         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
2962         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
2963         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
2964       } else {
2965         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2966       }
2967       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2968       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2969       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
2970     } else {
2971       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2972       IS         from,to;
2973       Vec        gvec;
2974       PetscInt   *garray,i;
2975 
2976       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
2977       B->ops->destroy = MatDestroy_MPIMAIJ;
2978       B->ops->view    = MatView_MPIMAIJ;
2979       b      = (Mat_MPIMAIJ*)B->data;
2980       b->dof = dof;
2981       b->A   = A;
2982       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
2983       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
2984 
2985       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
2986       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
2987       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
2988 
2989       /* create two temporary Index sets for build scatter gather */
2990       ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2991       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2992       ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);CHKERRQ(ierr);
2993       ierr = PetscFree(garray);CHKERRQ(ierr);
2994       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
2995 
2996       /* create temporary global vector to generate scatter context */
2997       ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap.n,dof*A->cmap.N,PETSC_NULL,&gvec);CHKERRQ(ierr);
2998       ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr);
2999 
3000       /* generate the scatter context */
3001       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3002 
3003       ierr = ISDestroy(from);CHKERRQ(ierr);
3004       ierr = ISDestroy(to);CHKERRQ(ierr);
3005       ierr = VecDestroy(gvec);CHKERRQ(ierr);
3006 
3007       B->ops->mult             = MatMult_MPIMAIJ_dof;
3008       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3009       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3010       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3011       B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
3012       B->ops->ptapnumeric_mpiaij  = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
3013       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3014     }
3015     *maij = B;
3016     ierr = MatView_Private(B);CHKERRQ(ierr);
3017   }
3018   PetscFunctionReturn(0);
3019 }
3020 
3021 
3022 
3023 
3024 
3025 
3026 
3027 
3028 
3029 
3030 
3031 
3032