xref: /petsc/src/mat/impls/maij/maij.c (revision 22612f2f7cceb60caedd65384cdf99fc989f2aeb)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the MAIJ  matrix storage format.
5   This format is used for restriction and interpolation operations for
6   multicomponent problems. It interpolates each component the same way
7   independently.
8 
9      We provide:
10          MatMult()
11          MatMultTranspose()
12          MatMultTransposeAdd()
13          MatMultAdd()
14           and
15          MatCreateMAIJ(Mat,dof,Mat*)
16 
17      This single directory handles both the sequential and parallel codes
18 */
19 
20 #include "src/mat/impls/maij/maij.h"
21 #include "src/mat/utils/freespace.h"
22 #include "private/vecimpl.h"
23 
24 #undef __FUNCT__
25 #define __FUNCT__ "MatMAIJGetAIJ"
26 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B)
27 {
28   PetscErrorCode ierr;
29   PetscTruth     ismpimaij,isseqmaij;
30 
31   PetscFunctionBegin;
32   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
33   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
34   if (ismpimaij) {
35     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
36 
37     *B = b->A;
38   } else if (isseqmaij) {
39     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
40 
41     *B = b->AIJ;
42   } else {
43     *B = A;
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMAIJRedimension"
50 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
51 {
52   PetscErrorCode ierr;
53   Mat            Aij;
54 
55   PetscFunctionBegin;
56   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
57   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatDestroy_SeqMAIJ"
63 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
64 {
65   PetscErrorCode ierr;
66   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
67 
68   PetscFunctionBegin;
69   if (b->AIJ) {
70     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
71   }
72   ierr = PetscFree(b);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatView_SeqMAIJ"
78 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
79 {
80   PetscErrorCode ierr;
81   Mat            B;
82 
83   PetscFunctionBegin;
84   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
85   ierr = MatView(B,viewer);CHKERRQ(ierr);
86   ierr = MatDestroy(B);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatView_MPIMAIJ"
92 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
93 {
94   PetscErrorCode ierr;
95   Mat            B;
96 
97   PetscFunctionBegin;
98   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
99   ierr = MatView(B,viewer);CHKERRQ(ierr);
100   ierr = MatDestroy(B);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatDestroy_MPIMAIJ"
106 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107 {
108   PetscErrorCode ierr;
109   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
110 
111   PetscFunctionBegin;
112   if (b->AIJ) {
113     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
114   }
115   if (b->OAIJ) {
116     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
117   }
118   if (b->A) {
119     ierr = MatDestroy(b->A);CHKERRQ(ierr);
120   }
121   if (b->ctx) {
122     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
123   }
124   if (b->w) {
125     ierr = VecDestroy(b->w);CHKERRQ(ierr);
126   }
127   ierr = PetscFree(b);CHKERRQ(ierr);
128   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 /*MC
133   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
134   multicomponent problems, interpolating or restricting each component the same way independently.
135   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
136 
137   Operations provided:
138 . MatMult
139 . MatMultTranspose
140 . MatMultAdd
141 . MatMultTransposeAdd
142 
143   Level: advanced
144 
145 .seealso: MatCreateSeqDense
146 M*/
147 
148 EXTERN_C_BEGIN
149 #undef __FUNCT__
150 #define __FUNCT__ "MatCreate_MAIJ"
151 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A)
152 {
153   PetscErrorCode ierr;
154   Mat_MPIMAIJ    *b;
155   PetscMPIInt    size;
156 
157   PetscFunctionBegin;
158   ierr     = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr);
159   A->data  = (void*)b;
160   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
161   A->factor           = 0;
162   A->mapping          = 0;
163 
164   b->AIJ  = 0;
165   b->dof  = 0;
166   b->OAIJ = 0;
167   b->ctx  = 0;
168   b->w    = 0;
169   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
170   if (size == 1){
171     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
172   } else {
173     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
174   }
175   PetscFunctionReturn(0);
176 }
177 EXTERN_C_END
178 
179 /* --------------------------------------------------------------------------------------*/
180 #undef __FUNCT__
181 #define __FUNCT__ "MatMult_SeqMAIJ_2"
182 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
183 {
184   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
185   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
186   PetscScalar    *x,*y,*v,sum1, sum2;
187   PetscErrorCode ierr;
188   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
189   PetscInt       n,i,jrow,j;
190 
191   PetscFunctionBegin;
192   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
193   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
194   idx  = a->j;
195   v    = a->a;
196   ii   = a->i;
197 
198   for (i=0; i<m; i++) {
199     jrow = ii[i];
200     n    = ii[i+1] - jrow;
201     sum1  = 0.0;
202     sum2  = 0.0;
203     for (j=0; j<n; j++) {
204       sum1 += v[jrow]*x[2*idx[jrow]];
205       sum2 += v[jrow]*x[2*idx[jrow]+1];
206       jrow++;
207      }
208     y[2*i]   = sum1;
209     y[2*i+1] = sum2;
210   }
211 
212   ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr);
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
220 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
221 {
222   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
223   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
224   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
225   PetscErrorCode ierr;
226   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
227 
228   PetscFunctionBegin;
229   ierr = VecSet(yy,zero);CHKERRQ(ierr);
230   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
231   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
232 
233   for (i=0; i<m; i++) {
234     idx    = a->j + a->i[i] ;
235     v      = a->a + a->i[i] ;
236     n      = a->i[i+1] - a->i[i];
237     alpha1 = x[2*i];
238     alpha2 = x[2*i+1];
239     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
240   }
241   ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr);
242   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
243   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
249 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
250 {
251   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
252   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
253   PetscScalar    *x,*y,*v,sum1, sum2;
254   PetscErrorCode ierr;
255   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
256   PetscInt       n,i,jrow,j;
257 
258   PetscFunctionBegin;
259   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
260   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
261   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
262   idx  = a->j;
263   v    = a->a;
264   ii   = a->i;
265 
266   for (i=0; i<m; i++) {
267     jrow = ii[i];
268     n    = ii[i+1] - jrow;
269     sum1  = 0.0;
270     sum2  = 0.0;
271     for (j=0; j<n; j++) {
272       sum1 += v[jrow]*x[2*idx[jrow]];
273       sum2 += v[jrow]*x[2*idx[jrow]+1];
274       jrow++;
275      }
276     y[2*i]   += sum1;
277     y[2*i+1] += sum2;
278   }
279 
280   ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr);
281   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
282   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
283   PetscFunctionReturn(0);
284 }
285 #undef __FUNCT__
286 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
287 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
288 {
289   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
290   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
291   PetscScalar    *x,*y,*v,alpha1,alpha2;
292   PetscErrorCode ierr;
293   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
294 
295   PetscFunctionBegin;
296   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
297   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
298   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
299 
300   for (i=0; i<m; i++) {
301     idx   = a->j + a->i[i] ;
302     v     = a->a + a->i[i] ;
303     n     = a->i[i+1] - a->i[i];
304     alpha1 = x[2*i];
305     alpha2 = x[2*i+1];
306     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
307   }
308   ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr);
309   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
310   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
311   PetscFunctionReturn(0);
312 }
313 /* --------------------------------------------------------------------------------------*/
314 #undef __FUNCT__
315 #define __FUNCT__ "MatMult_SeqMAIJ_3"
316 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
317 {
318   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
319   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
320   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
321   PetscErrorCode ierr;
322   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
323   PetscInt       n,i,jrow,j;
324 
325   PetscFunctionBegin;
326   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
327   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
328   idx  = a->j;
329   v    = a->a;
330   ii   = a->i;
331 
332   for (i=0; i<m; i++) {
333     jrow = ii[i];
334     n    = ii[i+1] - jrow;
335     sum1  = 0.0;
336     sum2  = 0.0;
337     sum3  = 0.0;
338     for (j=0; j<n; j++) {
339       sum1 += v[jrow]*x[3*idx[jrow]];
340       sum2 += v[jrow]*x[3*idx[jrow]+1];
341       sum3 += v[jrow]*x[3*idx[jrow]+2];
342       jrow++;
343      }
344     y[3*i]   = sum1;
345     y[3*i+1] = sum2;
346     y[3*i+2] = sum3;
347   }
348 
349   ierr = PetscLogFlops(6*a->nz - 3*m);CHKERRQ(ierr);
350   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
351   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
352   PetscFunctionReturn(0);
353 }
354 
355 #undef __FUNCT__
356 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
357 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
358 {
359   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
360   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
361   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
362   PetscErrorCode ierr;
363   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
364 
365   PetscFunctionBegin;
366   ierr = VecSet(yy,zero);CHKERRQ(ierr);
367   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
368   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
369 
370   for (i=0; i<m; i++) {
371     idx    = a->j + a->i[i];
372     v      = a->a + a->i[i];
373     n      = a->i[i+1] - a->i[i];
374     alpha1 = x[3*i];
375     alpha2 = x[3*i+1];
376     alpha3 = x[3*i+2];
377     while (n-->0) {
378       y[3*(*idx)]   += alpha1*(*v);
379       y[3*(*idx)+1] += alpha2*(*v);
380       y[3*(*idx)+2] += alpha3*(*v);
381       idx++; v++;
382     }
383   }
384   ierr = PetscLogFlops(6*a->nz - 3*b->AIJ->cmap.n);CHKERRQ(ierr);
385   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
386   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNCT__
391 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
392 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
393 {
394   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
395   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
396   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
397   PetscErrorCode ierr;
398   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
399   PetscInt       n,i,jrow,j;
400 
401   PetscFunctionBegin;
402   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
403   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
404   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
405   idx  = a->j;
406   v    = a->a;
407   ii   = a->i;
408 
409   for (i=0; i<m; i++) {
410     jrow = ii[i];
411     n    = ii[i+1] - jrow;
412     sum1  = 0.0;
413     sum2  = 0.0;
414     sum3  = 0.0;
415     for (j=0; j<n; j++) {
416       sum1 += v[jrow]*x[3*idx[jrow]];
417       sum2 += v[jrow]*x[3*idx[jrow]+1];
418       sum3 += v[jrow]*x[3*idx[jrow]+2];
419       jrow++;
420      }
421     y[3*i]   += sum1;
422     y[3*i+1] += sum2;
423     y[3*i+2] += sum3;
424   }
425 
426   ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr);
427   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
428   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 #undef __FUNCT__
432 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
433 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
434 {
435   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
436   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
437   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
438   PetscErrorCode ierr;
439   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
440 
441   PetscFunctionBegin;
442   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
443   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
444   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
445   for (i=0; i<m; i++) {
446     idx    = a->j + a->i[i] ;
447     v      = a->a + a->i[i] ;
448     n      = a->i[i+1] - a->i[i];
449     alpha1 = x[3*i];
450     alpha2 = x[3*i+1];
451     alpha3 = x[3*i+2];
452     while (n-->0) {
453       y[3*(*idx)]   += alpha1*(*v);
454       y[3*(*idx)+1] += alpha2*(*v);
455       y[3*(*idx)+2] += alpha3*(*v);
456       idx++; v++;
457     }
458   }
459   ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr);
460   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
461   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
462   PetscFunctionReturn(0);
463 }
464 
465 /* ------------------------------------------------------------------------------*/
466 #undef __FUNCT__
467 #define __FUNCT__ "MatMult_SeqMAIJ_4"
468 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
469 {
470   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
471   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
472   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
473   PetscErrorCode ierr;
474   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
475   PetscInt       n,i,jrow,j;
476 
477   PetscFunctionBegin;
478   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
479   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
480   idx  = a->j;
481   v    = a->a;
482   ii   = a->i;
483 
484   for (i=0; i<m; i++) {
485     jrow = ii[i];
486     n    = ii[i+1] - jrow;
487     sum1  = 0.0;
488     sum2  = 0.0;
489     sum3  = 0.0;
490     sum4  = 0.0;
491     for (j=0; j<n; j++) {
492       sum1 += v[jrow]*x[4*idx[jrow]];
493       sum2 += v[jrow]*x[4*idx[jrow]+1];
494       sum3 += v[jrow]*x[4*idx[jrow]+2];
495       sum4 += v[jrow]*x[4*idx[jrow]+3];
496       jrow++;
497      }
498     y[4*i]   = sum1;
499     y[4*i+1] = sum2;
500     y[4*i+2] = sum3;
501     y[4*i+3] = sum4;
502   }
503 
504   ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr);
505   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
506   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
512 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
513 {
514   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
515   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
516   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
517   PetscErrorCode ierr;
518   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
519 
520   PetscFunctionBegin;
521   ierr = VecSet(yy,zero);CHKERRQ(ierr);
522   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
523   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
524   for (i=0; i<m; i++) {
525     idx    = a->j + a->i[i] ;
526     v      = a->a + a->i[i] ;
527     n      = a->i[i+1] - a->i[i];
528     alpha1 = x[4*i];
529     alpha2 = x[4*i+1];
530     alpha3 = x[4*i+2];
531     alpha4 = x[4*i+3];
532     while (n-->0) {
533       y[4*(*idx)]   += alpha1*(*v);
534       y[4*(*idx)+1] += alpha2*(*v);
535       y[4*(*idx)+2] += alpha3*(*v);
536       y[4*(*idx)+3] += alpha4*(*v);
537       idx++; v++;
538     }
539   }
540   ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr);
541   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
542   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
548 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
549 {
550   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
551   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
552   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
553   PetscErrorCode ierr;
554   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
555   PetscInt       n,i,jrow,j;
556 
557   PetscFunctionBegin;
558   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
559   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
560   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
561   idx  = a->j;
562   v    = a->a;
563   ii   = a->i;
564 
565   for (i=0; i<m; i++) {
566     jrow = ii[i];
567     n    = ii[i+1] - jrow;
568     sum1  = 0.0;
569     sum2  = 0.0;
570     sum3  = 0.0;
571     sum4  = 0.0;
572     for (j=0; j<n; j++) {
573       sum1 += v[jrow]*x[4*idx[jrow]];
574       sum2 += v[jrow]*x[4*idx[jrow]+1];
575       sum3 += v[jrow]*x[4*idx[jrow]+2];
576       sum4 += v[jrow]*x[4*idx[jrow]+3];
577       jrow++;
578      }
579     y[4*i]   += sum1;
580     y[4*i+1] += sum2;
581     y[4*i+2] += sum3;
582     y[4*i+3] += sum4;
583   }
584 
585   ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr);
586   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
587   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 #undef __FUNCT__
591 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
592 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
593 {
594   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
595   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
596   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
597   PetscErrorCode ierr;
598   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
599 
600   PetscFunctionBegin;
601   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
602   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
603   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
604 
605   for (i=0; i<m; i++) {
606     idx    = a->j + a->i[i] ;
607     v      = a->a + a->i[i] ;
608     n      = a->i[i+1] - a->i[i];
609     alpha1 = x[4*i];
610     alpha2 = x[4*i+1];
611     alpha3 = x[4*i+2];
612     alpha4 = x[4*i+3];
613     while (n-->0) {
614       y[4*(*idx)]   += alpha1*(*v);
615       y[4*(*idx)+1] += alpha2*(*v);
616       y[4*(*idx)+2] += alpha3*(*v);
617       y[4*(*idx)+3] += alpha4*(*v);
618       idx++; v++;
619     }
620   }
621   ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr);
622   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
623   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
624   PetscFunctionReturn(0);
625 }
626 /* ------------------------------------------------------------------------------*/
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatMult_SeqMAIJ_5"
630 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
631 {
632   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
633   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
634   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
635   PetscErrorCode ierr;
636   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
637   PetscInt       n,i,jrow,j;
638 
639   PetscFunctionBegin;
640   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
641   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
642   idx  = a->j;
643   v    = a->a;
644   ii   = a->i;
645 
646   for (i=0; i<m; i++) {
647     jrow = ii[i];
648     n    = ii[i+1] - jrow;
649     sum1  = 0.0;
650     sum2  = 0.0;
651     sum3  = 0.0;
652     sum4  = 0.0;
653     sum5  = 0.0;
654     for (j=0; j<n; j++) {
655       sum1 += v[jrow]*x[5*idx[jrow]];
656       sum2 += v[jrow]*x[5*idx[jrow]+1];
657       sum3 += v[jrow]*x[5*idx[jrow]+2];
658       sum4 += v[jrow]*x[5*idx[jrow]+3];
659       sum5 += v[jrow]*x[5*idx[jrow]+4];
660       jrow++;
661      }
662     y[5*i]   = sum1;
663     y[5*i+1] = sum2;
664     y[5*i+2] = sum3;
665     y[5*i+3] = sum4;
666     y[5*i+4] = sum5;
667   }
668 
669   ierr = PetscLogFlops(10*a->nz - 5*m);CHKERRQ(ierr);
670   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
671   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
672   PetscFunctionReturn(0);
673 }
674 
675 #undef __FUNCT__
676 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
677 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
678 {
679   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
680   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
681   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
682   PetscErrorCode ierr;
683   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
684 
685   PetscFunctionBegin;
686   ierr = VecSet(yy,zero);CHKERRQ(ierr);
687   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
688   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
689 
690   for (i=0; i<m; i++) {
691     idx    = a->j + a->i[i] ;
692     v      = a->a + a->i[i] ;
693     n      = a->i[i+1] - a->i[i];
694     alpha1 = x[5*i];
695     alpha2 = x[5*i+1];
696     alpha3 = x[5*i+2];
697     alpha4 = x[5*i+3];
698     alpha5 = x[5*i+4];
699     while (n-->0) {
700       y[5*(*idx)]   += alpha1*(*v);
701       y[5*(*idx)+1] += alpha2*(*v);
702       y[5*(*idx)+2] += alpha3*(*v);
703       y[5*(*idx)+3] += alpha4*(*v);
704       y[5*(*idx)+4] += alpha5*(*v);
705       idx++; v++;
706     }
707   }
708   ierr = PetscLogFlops(10*a->nz - 5*b->AIJ->cmap.n);CHKERRQ(ierr);
709   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
710   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
711   PetscFunctionReturn(0);
712 }
713 
714 #undef __FUNCT__
715 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
716 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
717 {
718   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
719   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
720   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
721   PetscErrorCode ierr;
722   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
723   PetscInt       n,i,jrow,j;
724 
725   PetscFunctionBegin;
726   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
727   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
728   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
729   idx  = a->j;
730   v    = a->a;
731   ii   = a->i;
732 
733   for (i=0; i<m; i++) {
734     jrow = ii[i];
735     n    = ii[i+1] - jrow;
736     sum1  = 0.0;
737     sum2  = 0.0;
738     sum3  = 0.0;
739     sum4  = 0.0;
740     sum5  = 0.0;
741     for (j=0; j<n; j++) {
742       sum1 += v[jrow]*x[5*idx[jrow]];
743       sum2 += v[jrow]*x[5*idx[jrow]+1];
744       sum3 += v[jrow]*x[5*idx[jrow]+2];
745       sum4 += v[jrow]*x[5*idx[jrow]+3];
746       sum5 += v[jrow]*x[5*idx[jrow]+4];
747       jrow++;
748      }
749     y[5*i]   += sum1;
750     y[5*i+1] += sum2;
751     y[5*i+2] += sum3;
752     y[5*i+3] += sum4;
753     y[5*i+4] += sum5;
754   }
755 
756   ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr);
757   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
758   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNCT__
763 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
764 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
765 {
766   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
767   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
768   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
769   PetscErrorCode ierr;
770   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
771 
772   PetscFunctionBegin;
773   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
774   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
775   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
776 
777   for (i=0; i<m; i++) {
778     idx    = a->j + a->i[i] ;
779     v      = a->a + a->i[i] ;
780     n      = a->i[i+1] - a->i[i];
781     alpha1 = x[5*i];
782     alpha2 = x[5*i+1];
783     alpha3 = x[5*i+2];
784     alpha4 = x[5*i+3];
785     alpha5 = x[5*i+4];
786     while (n-->0) {
787       y[5*(*idx)]   += alpha1*(*v);
788       y[5*(*idx)+1] += alpha2*(*v);
789       y[5*(*idx)+2] += alpha3*(*v);
790       y[5*(*idx)+3] += alpha4*(*v);
791       y[5*(*idx)+4] += alpha5*(*v);
792       idx++; v++;
793     }
794   }
795   ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr);
796   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
797   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
798   PetscFunctionReturn(0);
799 }
800 
801 /* ------------------------------------------------------------------------------*/
802 #undef __FUNCT__
803 #define __FUNCT__ "MatMult_SeqMAIJ_6"
804 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
805 {
806   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
807   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
808   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
809   PetscErrorCode ierr;
810   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
811   PetscInt       n,i,jrow,j;
812 
813   PetscFunctionBegin;
814   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
815   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
816   idx  = a->j;
817   v    = a->a;
818   ii   = a->i;
819 
820   for (i=0; i<m; i++) {
821     jrow = ii[i];
822     n    = ii[i+1] - jrow;
823     sum1  = 0.0;
824     sum2  = 0.0;
825     sum3  = 0.0;
826     sum4  = 0.0;
827     sum5  = 0.0;
828     sum6  = 0.0;
829     for (j=0; j<n; j++) {
830       sum1 += v[jrow]*x[6*idx[jrow]];
831       sum2 += v[jrow]*x[6*idx[jrow]+1];
832       sum3 += v[jrow]*x[6*idx[jrow]+2];
833       sum4 += v[jrow]*x[6*idx[jrow]+3];
834       sum5 += v[jrow]*x[6*idx[jrow]+4];
835       sum6 += v[jrow]*x[6*idx[jrow]+5];
836       jrow++;
837      }
838     y[6*i]   = sum1;
839     y[6*i+1] = sum2;
840     y[6*i+2] = sum3;
841     y[6*i+3] = sum4;
842     y[6*i+4] = sum5;
843     y[6*i+5] = sum6;
844   }
845 
846   ierr = PetscLogFlops(12*a->nz - 6*m);CHKERRQ(ierr);
847   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
848   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
849   PetscFunctionReturn(0);
850 }
851 
852 #undef __FUNCT__
853 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
854 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
855 {
856   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
857   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
858   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
859   PetscErrorCode ierr;
860   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
861 
862   PetscFunctionBegin;
863   ierr = VecSet(yy,zero);CHKERRQ(ierr);
864   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
865   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
866 
867   for (i=0; i<m; i++) {
868     idx    = a->j + a->i[i] ;
869     v      = a->a + a->i[i] ;
870     n      = a->i[i+1] - a->i[i];
871     alpha1 = x[6*i];
872     alpha2 = x[6*i+1];
873     alpha3 = x[6*i+2];
874     alpha4 = x[6*i+3];
875     alpha5 = x[6*i+4];
876     alpha6 = x[6*i+5];
877     while (n-->0) {
878       y[6*(*idx)]   += alpha1*(*v);
879       y[6*(*idx)+1] += alpha2*(*v);
880       y[6*(*idx)+2] += alpha3*(*v);
881       y[6*(*idx)+3] += alpha4*(*v);
882       y[6*(*idx)+4] += alpha5*(*v);
883       y[6*(*idx)+5] += alpha6*(*v);
884       idx++; v++;
885     }
886   }
887   ierr = PetscLogFlops(12*a->nz - 6*b->AIJ->cmap.n);CHKERRQ(ierr);
888   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
889   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
890   PetscFunctionReturn(0);
891 }
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
895 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
896 {
897   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
898   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
899   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
900   PetscErrorCode ierr;
901   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
902   PetscInt       n,i,jrow,j;
903 
904   PetscFunctionBegin;
905   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
906   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
907   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
908   idx  = a->j;
909   v    = a->a;
910   ii   = a->i;
911 
912   for (i=0; i<m; i++) {
913     jrow = ii[i];
914     n    = ii[i+1] - jrow;
915     sum1  = 0.0;
916     sum2  = 0.0;
917     sum3  = 0.0;
918     sum4  = 0.0;
919     sum5  = 0.0;
920     sum6  = 0.0;
921     for (j=0; j<n; j++) {
922       sum1 += v[jrow]*x[6*idx[jrow]];
923       sum2 += v[jrow]*x[6*idx[jrow]+1];
924       sum3 += v[jrow]*x[6*idx[jrow]+2];
925       sum4 += v[jrow]*x[6*idx[jrow]+3];
926       sum5 += v[jrow]*x[6*idx[jrow]+4];
927       sum6 += v[jrow]*x[6*idx[jrow]+5];
928       jrow++;
929      }
930     y[6*i]   += sum1;
931     y[6*i+1] += sum2;
932     y[6*i+2] += sum3;
933     y[6*i+3] += sum4;
934     y[6*i+4] += sum5;
935     y[6*i+5] += sum6;
936   }
937 
938   ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr);
939   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
940   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
941   PetscFunctionReturn(0);
942 }
943 
944 #undef __FUNCT__
945 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
946 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
947 {
948   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
949   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
950   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
951   PetscErrorCode ierr;
952   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
953 
954   PetscFunctionBegin;
955   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
956   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
957   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
958 
959   for (i=0; i<m; i++) {
960     idx    = a->j + a->i[i] ;
961     v      = a->a + a->i[i] ;
962     n      = a->i[i+1] - a->i[i];
963     alpha1 = x[6*i];
964     alpha2 = x[6*i+1];
965     alpha3 = x[6*i+2];
966     alpha4 = x[6*i+3];
967     alpha5 = x[6*i+4];
968     alpha6 = x[6*i+5];
969     while (n-->0) {
970       y[6*(*idx)]   += alpha1*(*v);
971       y[6*(*idx)+1] += alpha2*(*v);
972       y[6*(*idx)+2] += alpha3*(*v);
973       y[6*(*idx)+3] += alpha4*(*v);
974       y[6*(*idx)+4] += alpha5*(*v);
975       y[6*(*idx)+5] += alpha6*(*v);
976       idx++; v++;
977     }
978   }
979   ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr);
980   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
981   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
982   PetscFunctionReturn(0);
983 }
984 
985 /* ------------------------------------------------------------------------------*/
986 #undef __FUNCT__
987 #define __FUNCT__ "MatMult_SeqMAIJ_7"
988 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
989 {
990   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
991   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
992   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
993   PetscErrorCode ierr;
994   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
995   PetscInt       n,i,jrow,j;
996 
997   PetscFunctionBegin;
998   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
999   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1000   idx  = a->j;
1001   v    = a->a;
1002   ii   = a->i;
1003 
1004   for (i=0; i<m; i++) {
1005     jrow = ii[i];
1006     n    = ii[i+1] - jrow;
1007     sum1  = 0.0;
1008     sum2  = 0.0;
1009     sum3  = 0.0;
1010     sum4  = 0.0;
1011     sum5  = 0.0;
1012     sum6  = 0.0;
1013     sum7  = 0.0;
1014     for (j=0; j<n; j++) {
1015       sum1 += v[jrow]*x[7*idx[jrow]];
1016       sum2 += v[jrow]*x[7*idx[jrow]+1];
1017       sum3 += v[jrow]*x[7*idx[jrow]+2];
1018       sum4 += v[jrow]*x[7*idx[jrow]+3];
1019       sum5 += v[jrow]*x[7*idx[jrow]+4];
1020       sum6 += v[jrow]*x[7*idx[jrow]+5];
1021       sum7 += v[jrow]*x[7*idx[jrow]+6];
1022       jrow++;
1023      }
1024     y[7*i]   = sum1;
1025     y[7*i+1] = sum2;
1026     y[7*i+2] = sum3;
1027     y[7*i+3] = sum4;
1028     y[7*i+4] = sum5;
1029     y[7*i+5] = sum6;
1030     y[7*i+6] = sum7;
1031   }
1032 
1033   ierr = PetscLogFlops(14*a->nz - 7*m);CHKERRQ(ierr);
1034   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1035   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 #undef __FUNCT__
1040 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1041 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1042 {
1043   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1044   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1045   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1046   PetscErrorCode ierr;
1047   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1048 
1049   PetscFunctionBegin;
1050   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1051   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1052   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1053 
1054   for (i=0; i<m; i++) {
1055     idx    = a->j + a->i[i] ;
1056     v      = a->a + a->i[i] ;
1057     n      = a->i[i+1] - a->i[i];
1058     alpha1 = x[7*i];
1059     alpha2 = x[7*i+1];
1060     alpha3 = x[7*i+2];
1061     alpha4 = x[7*i+3];
1062     alpha5 = x[7*i+4];
1063     alpha6 = x[7*i+5];
1064     alpha7 = x[7*i+6];
1065     while (n-->0) {
1066       y[7*(*idx)]   += alpha1*(*v);
1067       y[7*(*idx)+1] += alpha2*(*v);
1068       y[7*(*idx)+2] += alpha3*(*v);
1069       y[7*(*idx)+3] += alpha4*(*v);
1070       y[7*(*idx)+4] += alpha5*(*v);
1071       y[7*(*idx)+5] += alpha6*(*v);
1072       y[7*(*idx)+6] += alpha7*(*v);
1073       idx++; v++;
1074     }
1075   }
1076   ierr = PetscLogFlops(14*a->nz - 7*b->AIJ->cmap.n);CHKERRQ(ierr);
1077   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1078   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 #undef __FUNCT__
1083 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1084 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1085 {
1086   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1087   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1088   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1089   PetscErrorCode ierr;
1090   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1091   PetscInt       n,i,jrow,j;
1092 
1093   PetscFunctionBegin;
1094   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1095   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1096   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1097   idx  = a->j;
1098   v    = a->a;
1099   ii   = a->i;
1100 
1101   for (i=0; i<m; i++) {
1102     jrow = ii[i];
1103     n    = ii[i+1] - jrow;
1104     sum1  = 0.0;
1105     sum2  = 0.0;
1106     sum3  = 0.0;
1107     sum4  = 0.0;
1108     sum5  = 0.0;
1109     sum6  = 0.0;
1110     sum7  = 0.0;
1111     for (j=0; j<n; j++) {
1112       sum1 += v[jrow]*x[7*idx[jrow]];
1113       sum2 += v[jrow]*x[7*idx[jrow]+1];
1114       sum3 += v[jrow]*x[7*idx[jrow]+2];
1115       sum4 += v[jrow]*x[7*idx[jrow]+3];
1116       sum5 += v[jrow]*x[7*idx[jrow]+4];
1117       sum6 += v[jrow]*x[7*idx[jrow]+5];
1118       sum7 += v[jrow]*x[7*idx[jrow]+6];
1119       jrow++;
1120      }
1121     y[7*i]   += sum1;
1122     y[7*i+1] += sum2;
1123     y[7*i+2] += sum3;
1124     y[7*i+3] += sum4;
1125     y[7*i+4] += sum5;
1126     y[7*i+5] += sum6;
1127     y[7*i+6] += sum7;
1128   }
1129 
1130   ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr);
1131   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1132   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1138 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1139 {
1140   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1141   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1142   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1143   PetscErrorCode ierr;
1144   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1145 
1146   PetscFunctionBegin;
1147   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1148   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1149   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1150   for (i=0; i<m; i++) {
1151     idx    = a->j + a->i[i] ;
1152     v      = a->a + a->i[i] ;
1153     n      = a->i[i+1] - a->i[i];
1154     alpha1 = x[7*i];
1155     alpha2 = x[7*i+1];
1156     alpha3 = x[7*i+2];
1157     alpha4 = x[7*i+3];
1158     alpha5 = x[7*i+4];
1159     alpha6 = x[7*i+5];
1160     alpha7 = x[7*i+6];
1161     while (n-->0) {
1162       y[7*(*idx)]   += alpha1*(*v);
1163       y[7*(*idx)+1] += alpha2*(*v);
1164       y[7*(*idx)+2] += alpha3*(*v);
1165       y[7*(*idx)+3] += alpha4*(*v);
1166       y[7*(*idx)+4] += alpha5*(*v);
1167       y[7*(*idx)+5] += alpha6*(*v);
1168       y[7*(*idx)+6] += alpha7*(*v);
1169       idx++; v++;
1170     }
1171   }
1172   ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr);
1173   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1174   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 #undef __FUNCT__
1179 #define __FUNCT__ "MatMult_SeqMAIJ_8"
1180 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1181 {
1182   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1183   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1184   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1185   PetscErrorCode ierr;
1186   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1187   PetscInt       n,i,jrow,j;
1188 
1189   PetscFunctionBegin;
1190   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1191   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1192   idx  = a->j;
1193   v    = a->a;
1194   ii   = a->i;
1195 
1196   for (i=0; i<m; i++) {
1197     jrow = ii[i];
1198     n    = ii[i+1] - jrow;
1199     sum1  = 0.0;
1200     sum2  = 0.0;
1201     sum3  = 0.0;
1202     sum4  = 0.0;
1203     sum5  = 0.0;
1204     sum6  = 0.0;
1205     sum7  = 0.0;
1206     sum8  = 0.0;
1207     for (j=0; j<n; j++) {
1208       sum1 += v[jrow]*x[8*idx[jrow]];
1209       sum2 += v[jrow]*x[8*idx[jrow]+1];
1210       sum3 += v[jrow]*x[8*idx[jrow]+2];
1211       sum4 += v[jrow]*x[8*idx[jrow]+3];
1212       sum5 += v[jrow]*x[8*idx[jrow]+4];
1213       sum6 += v[jrow]*x[8*idx[jrow]+5];
1214       sum7 += v[jrow]*x[8*idx[jrow]+6];
1215       sum8 += v[jrow]*x[8*idx[jrow]+7];
1216       jrow++;
1217      }
1218     y[8*i]   = sum1;
1219     y[8*i+1] = sum2;
1220     y[8*i+2] = sum3;
1221     y[8*i+3] = sum4;
1222     y[8*i+4] = sum5;
1223     y[8*i+5] = sum6;
1224     y[8*i+6] = sum7;
1225     y[8*i+7] = sum8;
1226   }
1227 
1228   ierr = PetscLogFlops(16*a->nz - 8*m);CHKERRQ(ierr);
1229   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1230   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1236 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1237 {
1238   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1239   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1240   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1241   PetscErrorCode ierr;
1242   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1243 
1244   PetscFunctionBegin;
1245   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1246   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1247   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1248 
1249   for (i=0; i<m; i++) {
1250     idx    = a->j + a->i[i] ;
1251     v      = a->a + a->i[i] ;
1252     n      = a->i[i+1] - a->i[i];
1253     alpha1 = x[8*i];
1254     alpha2 = x[8*i+1];
1255     alpha3 = x[8*i+2];
1256     alpha4 = x[8*i+3];
1257     alpha5 = x[8*i+4];
1258     alpha6 = x[8*i+5];
1259     alpha7 = x[8*i+6];
1260     alpha8 = x[8*i+7];
1261     while (n-->0) {
1262       y[8*(*idx)]   += alpha1*(*v);
1263       y[8*(*idx)+1] += alpha2*(*v);
1264       y[8*(*idx)+2] += alpha3*(*v);
1265       y[8*(*idx)+3] += alpha4*(*v);
1266       y[8*(*idx)+4] += alpha5*(*v);
1267       y[8*(*idx)+5] += alpha6*(*v);
1268       y[8*(*idx)+6] += alpha7*(*v);
1269       y[8*(*idx)+7] += alpha8*(*v);
1270       idx++; v++;
1271     }
1272   }
1273   ierr = PetscLogFlops(16*a->nz - 8*b->AIJ->cmap.n);CHKERRQ(ierr);
1274   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1275   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1281 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1282 {
1283   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1284   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1285   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1286   PetscErrorCode ierr;
1287   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1288   PetscInt       n,i,jrow,j;
1289 
1290   PetscFunctionBegin;
1291   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1292   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1293   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1294   idx  = a->j;
1295   v    = a->a;
1296   ii   = a->i;
1297 
1298   for (i=0; i<m; i++) {
1299     jrow = ii[i];
1300     n    = ii[i+1] - jrow;
1301     sum1  = 0.0;
1302     sum2  = 0.0;
1303     sum3  = 0.0;
1304     sum4  = 0.0;
1305     sum5  = 0.0;
1306     sum6  = 0.0;
1307     sum7  = 0.0;
1308     sum8  = 0.0;
1309     for (j=0; j<n; j++) {
1310       sum1 += v[jrow]*x[8*idx[jrow]];
1311       sum2 += v[jrow]*x[8*idx[jrow]+1];
1312       sum3 += v[jrow]*x[8*idx[jrow]+2];
1313       sum4 += v[jrow]*x[8*idx[jrow]+3];
1314       sum5 += v[jrow]*x[8*idx[jrow]+4];
1315       sum6 += v[jrow]*x[8*idx[jrow]+5];
1316       sum7 += v[jrow]*x[8*idx[jrow]+6];
1317       sum8 += v[jrow]*x[8*idx[jrow]+7];
1318       jrow++;
1319      }
1320     y[8*i]   += sum1;
1321     y[8*i+1] += sum2;
1322     y[8*i+2] += sum3;
1323     y[8*i+3] += sum4;
1324     y[8*i+4] += sum5;
1325     y[8*i+5] += sum6;
1326     y[8*i+6] += sum7;
1327     y[8*i+7] += sum8;
1328   }
1329 
1330   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
1331   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1332   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1338 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1339 {
1340   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1341   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1342   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1343   PetscErrorCode ierr;
1344   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1345 
1346   PetscFunctionBegin;
1347   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1348   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1349   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1350   for (i=0; i<m; i++) {
1351     idx    = a->j + a->i[i] ;
1352     v      = a->a + a->i[i] ;
1353     n      = a->i[i+1] - a->i[i];
1354     alpha1 = x[8*i];
1355     alpha2 = x[8*i+1];
1356     alpha3 = x[8*i+2];
1357     alpha4 = x[8*i+3];
1358     alpha5 = x[8*i+4];
1359     alpha6 = x[8*i+5];
1360     alpha7 = x[8*i+6];
1361     alpha8 = x[8*i+7];
1362     while (n-->0) {
1363       y[8*(*idx)]   += alpha1*(*v);
1364       y[8*(*idx)+1] += alpha2*(*v);
1365       y[8*(*idx)+2] += alpha3*(*v);
1366       y[8*(*idx)+3] += alpha4*(*v);
1367       y[8*(*idx)+4] += alpha5*(*v);
1368       y[8*(*idx)+5] += alpha6*(*v);
1369       y[8*(*idx)+6] += alpha7*(*v);
1370       y[8*(*idx)+7] += alpha8*(*v);
1371       idx++; v++;
1372     }
1373   }
1374   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
1375   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1376   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 /* ------------------------------------------------------------------------------*/
1381 #undef __FUNCT__
1382 #define __FUNCT__ "MatMult_SeqMAIJ_9"
1383 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1384 {
1385   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1386   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1387   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1388   PetscErrorCode ierr;
1389   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1390   PetscInt       n,i,jrow,j;
1391 
1392   PetscFunctionBegin;
1393   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1394   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1395   idx  = a->j;
1396   v    = a->a;
1397   ii   = a->i;
1398 
1399   for (i=0; i<m; i++) {
1400     jrow = ii[i];
1401     n    = ii[i+1] - jrow;
1402     sum1  = 0.0;
1403     sum2  = 0.0;
1404     sum3  = 0.0;
1405     sum4  = 0.0;
1406     sum5  = 0.0;
1407     sum6  = 0.0;
1408     sum7  = 0.0;
1409     sum8  = 0.0;
1410     sum9  = 0.0;
1411     for (j=0; j<n; j++) {
1412       sum1 += v[jrow]*x[9*idx[jrow]];
1413       sum2 += v[jrow]*x[9*idx[jrow]+1];
1414       sum3 += v[jrow]*x[9*idx[jrow]+2];
1415       sum4 += v[jrow]*x[9*idx[jrow]+3];
1416       sum5 += v[jrow]*x[9*idx[jrow]+4];
1417       sum6 += v[jrow]*x[9*idx[jrow]+5];
1418       sum7 += v[jrow]*x[9*idx[jrow]+6];
1419       sum8 += v[jrow]*x[9*idx[jrow]+7];
1420       sum9 += v[jrow]*x[9*idx[jrow]+8];
1421       jrow++;
1422      }
1423     y[9*i]   = sum1;
1424     y[9*i+1] = sum2;
1425     y[9*i+2] = sum3;
1426     y[9*i+3] = sum4;
1427     y[9*i+4] = sum5;
1428     y[9*i+5] = sum6;
1429     y[9*i+6] = sum7;
1430     y[9*i+7] = sum8;
1431     y[9*i+8] = sum9;
1432   }
1433 
1434   ierr = PetscLogFlops(18*a->nz - 9*m);CHKERRQ(ierr);
1435   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1436   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 /* ------------------------------------------------------------------------------*/
1441 
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1444 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1445 {
1446   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1447   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1448   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1449   PetscErrorCode ierr;
1450   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1451 
1452   PetscFunctionBegin;
1453   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1454   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1455   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1456 
1457   for (i=0; i<m; i++) {
1458     idx    = a->j + a->i[i] ;
1459     v      = a->a + a->i[i] ;
1460     n      = a->i[i+1] - a->i[i];
1461     alpha1 = x[9*i];
1462     alpha2 = x[9*i+1];
1463     alpha3 = x[9*i+2];
1464     alpha4 = x[9*i+3];
1465     alpha5 = x[9*i+4];
1466     alpha6 = x[9*i+5];
1467     alpha7 = x[9*i+6];
1468     alpha8 = x[9*i+7];
1469     alpha9 = x[9*i+8];
1470     while (n-->0) {
1471       y[9*(*idx)]   += alpha1*(*v);
1472       y[9*(*idx)+1] += alpha2*(*v);
1473       y[9*(*idx)+2] += alpha3*(*v);
1474       y[9*(*idx)+3] += alpha4*(*v);
1475       y[9*(*idx)+4] += alpha5*(*v);
1476       y[9*(*idx)+5] += alpha6*(*v);
1477       y[9*(*idx)+6] += alpha7*(*v);
1478       y[9*(*idx)+7] += alpha8*(*v);
1479       y[9*(*idx)+8] += alpha9*(*v);
1480       idx++; v++;
1481     }
1482   }
1483   ierr = PetscLogFlops(18*a->nz - 9*b->AIJ->cmap.n);CHKERRQ(ierr);
1484   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1485   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 #undef __FUNCT__
1490 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1491 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1492 {
1493   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1494   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1495   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1496   PetscErrorCode ierr;
1497   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1498   PetscInt       n,i,jrow,j;
1499 
1500   PetscFunctionBegin;
1501   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1502   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1503   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1504   idx  = a->j;
1505   v    = a->a;
1506   ii   = a->i;
1507 
1508   for (i=0; i<m; i++) {
1509     jrow = ii[i];
1510     n    = ii[i+1] - jrow;
1511     sum1  = 0.0;
1512     sum2  = 0.0;
1513     sum3  = 0.0;
1514     sum4  = 0.0;
1515     sum5  = 0.0;
1516     sum6  = 0.0;
1517     sum7  = 0.0;
1518     sum8  = 0.0;
1519     sum9  = 0.0;
1520     for (j=0; j<n; j++) {
1521       sum1 += v[jrow]*x[9*idx[jrow]];
1522       sum2 += v[jrow]*x[9*idx[jrow]+1];
1523       sum3 += v[jrow]*x[9*idx[jrow]+2];
1524       sum4 += v[jrow]*x[9*idx[jrow]+3];
1525       sum5 += v[jrow]*x[9*idx[jrow]+4];
1526       sum6 += v[jrow]*x[9*idx[jrow]+5];
1527       sum7 += v[jrow]*x[9*idx[jrow]+6];
1528       sum8 += v[jrow]*x[9*idx[jrow]+7];
1529       sum9 += v[jrow]*x[9*idx[jrow]+8];
1530       jrow++;
1531      }
1532     y[9*i]   += sum1;
1533     y[9*i+1] += sum2;
1534     y[9*i+2] += sum3;
1535     y[9*i+3] += sum4;
1536     y[9*i+4] += sum5;
1537     y[9*i+5] += sum6;
1538     y[9*i+6] += sum7;
1539     y[9*i+7] += sum8;
1540     y[9*i+8] += sum9;
1541   }
1542 
1543   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1544   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1545   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 #undef __FUNCT__
1550 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1551 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1552 {
1553   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1554   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1555   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1556   PetscErrorCode ierr;
1557   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1558 
1559   PetscFunctionBegin;
1560   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1561   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1562   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1563   for (i=0; i<m; i++) {
1564     idx    = a->j + a->i[i] ;
1565     v      = a->a + a->i[i] ;
1566     n      = a->i[i+1] - a->i[i];
1567     alpha1 = x[9*i];
1568     alpha2 = x[9*i+1];
1569     alpha3 = x[9*i+2];
1570     alpha4 = x[9*i+3];
1571     alpha5 = x[9*i+4];
1572     alpha6 = x[9*i+5];
1573     alpha7 = x[9*i+6];
1574     alpha8 = x[9*i+7];
1575     alpha9 = x[9*i+8];
1576     while (n-->0) {
1577       y[9*(*idx)]   += alpha1*(*v);
1578       y[9*(*idx)+1] += alpha2*(*v);
1579       y[9*(*idx)+2] += alpha3*(*v);
1580       y[9*(*idx)+3] += alpha4*(*v);
1581       y[9*(*idx)+4] += alpha5*(*v);
1582       y[9*(*idx)+5] += alpha6*(*v);
1583       y[9*(*idx)+6] += alpha7*(*v);
1584       y[9*(*idx)+7] += alpha8*(*v);
1585       y[9*(*idx)+8] += alpha9*(*v);
1586       idx++; v++;
1587     }
1588   }
1589   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1590   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1591   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1592   PetscFunctionReturn(0);
1593 }
1594 /*--------------------------------------------------------------------------------------------*/
1595 #undef __FUNCT__
1596 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1597 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1598 {
1599   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1600   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1601   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1602   PetscErrorCode ierr;
1603   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1604   PetscInt       n,i,jrow,j;
1605 
1606   PetscFunctionBegin;
1607   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1608   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1609   idx  = a->j;
1610   v    = a->a;
1611   ii   = a->i;
1612 
1613   for (i=0; i<m; i++) {
1614     jrow = ii[i];
1615     n    = ii[i+1] - jrow;
1616     sum1  = 0.0;
1617     sum2  = 0.0;
1618     sum3  = 0.0;
1619     sum4  = 0.0;
1620     sum5  = 0.0;
1621     sum6  = 0.0;
1622     sum7  = 0.0;
1623     sum8  = 0.0;
1624     sum9  = 0.0;
1625     sum10 = 0.0;
1626     for (j=0; j<n; j++) {
1627       sum1  += v[jrow]*x[10*idx[jrow]];
1628       sum2  += v[jrow]*x[10*idx[jrow]+1];
1629       sum3  += v[jrow]*x[10*idx[jrow]+2];
1630       sum4  += v[jrow]*x[10*idx[jrow]+3];
1631       sum5  += v[jrow]*x[10*idx[jrow]+4];
1632       sum6  += v[jrow]*x[10*idx[jrow]+5];
1633       sum7  += v[jrow]*x[10*idx[jrow]+6];
1634       sum8  += v[jrow]*x[10*idx[jrow]+7];
1635       sum9  += v[jrow]*x[10*idx[jrow]+8];
1636       sum10 += v[jrow]*x[10*idx[jrow]+9];
1637       jrow++;
1638      }
1639     y[10*i]   = sum1;
1640     y[10*i+1] = sum2;
1641     y[10*i+2] = sum3;
1642     y[10*i+3] = sum4;
1643     y[10*i+4] = sum5;
1644     y[10*i+5] = sum6;
1645     y[10*i+6] = sum7;
1646     y[10*i+7] = sum8;
1647     y[10*i+8] = sum9;
1648     y[10*i+9] = sum10;
1649   }
1650 
1651   ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr);
1652   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1653   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1659 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1660 {
1661   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1662   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1663   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1664   PetscErrorCode ierr;
1665   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1666   PetscInt       n,i,jrow,j;
1667 
1668   PetscFunctionBegin;
1669   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1670   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1671   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1672   idx  = a->j;
1673   v    = a->a;
1674   ii   = a->i;
1675 
1676   for (i=0; i<m; i++) {
1677     jrow = ii[i];
1678     n    = ii[i+1] - jrow;
1679     sum1  = 0.0;
1680     sum2  = 0.0;
1681     sum3  = 0.0;
1682     sum4  = 0.0;
1683     sum5  = 0.0;
1684     sum6  = 0.0;
1685     sum7  = 0.0;
1686     sum8  = 0.0;
1687     sum9  = 0.0;
1688     sum10 = 0.0;
1689     for (j=0; j<n; j++) {
1690       sum1  += v[jrow]*x[10*idx[jrow]];
1691       sum2  += v[jrow]*x[10*idx[jrow]+1];
1692       sum3  += v[jrow]*x[10*idx[jrow]+2];
1693       sum4  += v[jrow]*x[10*idx[jrow]+3];
1694       sum5  += v[jrow]*x[10*idx[jrow]+4];
1695       sum6  += v[jrow]*x[10*idx[jrow]+5];
1696       sum7  += v[jrow]*x[10*idx[jrow]+6];
1697       sum8  += v[jrow]*x[10*idx[jrow]+7];
1698       sum9  += v[jrow]*x[10*idx[jrow]+8];
1699       sum10 += v[jrow]*x[10*idx[jrow]+9];
1700       jrow++;
1701      }
1702     y[10*i]   += sum1;
1703     y[10*i+1] += sum2;
1704     y[10*i+2] += sum3;
1705     y[10*i+3] += sum4;
1706     y[10*i+4] += sum5;
1707     y[10*i+5] += sum6;
1708     y[10*i+6] += sum7;
1709     y[10*i+7] += sum8;
1710     y[10*i+8] += sum9;
1711     y[10*i+9] += sum10;
1712   }
1713 
1714   ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr);
1715   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1716   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1722 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1723 {
1724   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1725   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1726   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1727   PetscErrorCode ierr;
1728   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1729 
1730   PetscFunctionBegin;
1731   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1732   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1733   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1734 
1735   for (i=0; i<m; i++) {
1736     idx    = a->j + a->i[i] ;
1737     v      = a->a + a->i[i] ;
1738     n      = a->i[i+1] - a->i[i];
1739     alpha1 = x[10*i];
1740     alpha2 = x[10*i+1];
1741     alpha3 = x[10*i+2];
1742     alpha4 = x[10*i+3];
1743     alpha5 = x[10*i+4];
1744     alpha6 = x[10*i+5];
1745     alpha7 = x[10*i+6];
1746     alpha8 = x[10*i+7];
1747     alpha9 = x[10*i+8];
1748     alpha10 = x[10*i+9];
1749     while (n-->0) {
1750       y[10*(*idx)]   += alpha1*(*v);
1751       y[10*(*idx)+1] += alpha2*(*v);
1752       y[10*(*idx)+2] += alpha3*(*v);
1753       y[10*(*idx)+3] += alpha4*(*v);
1754       y[10*(*idx)+4] += alpha5*(*v);
1755       y[10*(*idx)+5] += alpha6*(*v);
1756       y[10*(*idx)+6] += alpha7*(*v);
1757       y[10*(*idx)+7] += alpha8*(*v);
1758       y[10*(*idx)+8] += alpha9*(*v);
1759       y[10*(*idx)+9] += alpha10*(*v);
1760       idx++; v++;
1761     }
1762   }
1763   ierr = PetscLogFlops(20*a->nz - 10*b->AIJ->cmap.n);CHKERRQ(ierr);
1764   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1765   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 #undef __FUNCT__
1770 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1771 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1772 {
1773   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1774   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1775   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1776   PetscErrorCode ierr;
1777   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1778 
1779   PetscFunctionBegin;
1780   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1781   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1782   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1783   for (i=0; i<m; i++) {
1784     idx    = a->j + a->i[i] ;
1785     v      = a->a + a->i[i] ;
1786     n      = a->i[i+1] - a->i[i];
1787     alpha1 = x[10*i];
1788     alpha2 = x[10*i+1];
1789     alpha3 = x[10*i+2];
1790     alpha4 = x[10*i+3];
1791     alpha5 = x[10*i+4];
1792     alpha6 = x[10*i+5];
1793     alpha7 = x[10*i+6];
1794     alpha8 = x[10*i+7];
1795     alpha9 = x[10*i+8];
1796     alpha10 = x[10*i+9];
1797     while (n-->0) {
1798       y[10*(*idx)]   += alpha1*(*v);
1799       y[10*(*idx)+1] += alpha2*(*v);
1800       y[10*(*idx)+2] += alpha3*(*v);
1801       y[10*(*idx)+3] += alpha4*(*v);
1802       y[10*(*idx)+4] += alpha5*(*v);
1803       y[10*(*idx)+5] += alpha6*(*v);
1804       y[10*(*idx)+6] += alpha7*(*v);
1805       y[10*(*idx)+7] += alpha8*(*v);
1806       y[10*(*idx)+8] += alpha9*(*v);
1807       y[10*(*idx)+9] += alpha10*(*v);
1808       idx++; v++;
1809     }
1810   }
1811   ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr);
1812   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1813   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 
1818 /*--------------------------------------------------------------------------------------------*/
1819 #undef __FUNCT__
1820 #define __FUNCT__ "MatMult_SeqMAIJ_16"
1821 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1822 {
1823   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1824   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1825   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1826   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1827   PetscErrorCode ierr;
1828   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1829   PetscInt       n,i,jrow,j;
1830 
1831   PetscFunctionBegin;
1832   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1833   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1834   idx  = a->j;
1835   v    = a->a;
1836   ii   = a->i;
1837 
1838   for (i=0; i<m; i++) {
1839     jrow = ii[i];
1840     n    = ii[i+1] - jrow;
1841     sum1  = 0.0;
1842     sum2  = 0.0;
1843     sum3  = 0.0;
1844     sum4  = 0.0;
1845     sum5  = 0.0;
1846     sum6  = 0.0;
1847     sum7  = 0.0;
1848     sum8  = 0.0;
1849     sum9  = 0.0;
1850     sum10 = 0.0;
1851     sum11 = 0.0;
1852     sum12 = 0.0;
1853     sum13 = 0.0;
1854     sum14 = 0.0;
1855     sum15 = 0.0;
1856     sum16 = 0.0;
1857     for (j=0; j<n; j++) {
1858       sum1  += v[jrow]*x[16*idx[jrow]];
1859       sum2  += v[jrow]*x[16*idx[jrow]+1];
1860       sum3  += v[jrow]*x[16*idx[jrow]+2];
1861       sum4  += v[jrow]*x[16*idx[jrow]+3];
1862       sum5  += v[jrow]*x[16*idx[jrow]+4];
1863       sum6  += v[jrow]*x[16*idx[jrow]+5];
1864       sum7  += v[jrow]*x[16*idx[jrow]+6];
1865       sum8  += v[jrow]*x[16*idx[jrow]+7];
1866       sum9  += v[jrow]*x[16*idx[jrow]+8];
1867       sum10 += v[jrow]*x[16*idx[jrow]+9];
1868       sum11 += v[jrow]*x[16*idx[jrow]+10];
1869       sum12 += v[jrow]*x[16*idx[jrow]+11];
1870       sum13 += v[jrow]*x[16*idx[jrow]+12];
1871       sum14 += v[jrow]*x[16*idx[jrow]+13];
1872       sum15 += v[jrow]*x[16*idx[jrow]+14];
1873       sum16 += v[jrow]*x[16*idx[jrow]+15];
1874       jrow++;
1875      }
1876     y[16*i]    = sum1;
1877     y[16*i+1]  = sum2;
1878     y[16*i+2]  = sum3;
1879     y[16*i+3]  = sum4;
1880     y[16*i+4]  = sum5;
1881     y[16*i+5]  = sum6;
1882     y[16*i+6]  = sum7;
1883     y[16*i+7]  = sum8;
1884     y[16*i+8]  = sum9;
1885     y[16*i+9]  = sum10;
1886     y[16*i+10] = sum11;
1887     y[16*i+11] = sum12;
1888     y[16*i+12] = sum13;
1889     y[16*i+13] = sum14;
1890     y[16*i+14] = sum15;
1891     y[16*i+15] = sum16;
1892   }
1893 
1894   ierr = PetscLogFlops(32*a->nz - 16*m);CHKERRQ(ierr);
1895   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1896   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 #undef __FUNCT__
1901 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1902 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1903 {
1904   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1905   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1906   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1907   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1908   PetscErrorCode ierr;
1909   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1910 
1911   PetscFunctionBegin;
1912   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1913   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1914   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1915 
1916   for (i=0; i<m; i++) {
1917     idx    = a->j + a->i[i] ;
1918     v      = a->a + a->i[i] ;
1919     n      = a->i[i+1] - a->i[i];
1920     alpha1  = x[16*i];
1921     alpha2  = x[16*i+1];
1922     alpha3  = x[16*i+2];
1923     alpha4  = x[16*i+3];
1924     alpha5  = x[16*i+4];
1925     alpha6  = x[16*i+5];
1926     alpha7  = x[16*i+6];
1927     alpha8  = x[16*i+7];
1928     alpha9  = x[16*i+8];
1929     alpha10 = x[16*i+9];
1930     alpha11 = x[16*i+10];
1931     alpha12 = x[16*i+11];
1932     alpha13 = x[16*i+12];
1933     alpha14 = x[16*i+13];
1934     alpha15 = x[16*i+14];
1935     alpha16 = x[16*i+15];
1936     while (n-->0) {
1937       y[16*(*idx)]    += alpha1*(*v);
1938       y[16*(*idx)+1]  += alpha2*(*v);
1939       y[16*(*idx)+2]  += alpha3*(*v);
1940       y[16*(*idx)+3]  += alpha4*(*v);
1941       y[16*(*idx)+4]  += alpha5*(*v);
1942       y[16*(*idx)+5]  += alpha6*(*v);
1943       y[16*(*idx)+6]  += alpha7*(*v);
1944       y[16*(*idx)+7]  += alpha8*(*v);
1945       y[16*(*idx)+8]  += alpha9*(*v);
1946       y[16*(*idx)+9]  += alpha10*(*v);
1947       y[16*(*idx)+10] += alpha11*(*v);
1948       y[16*(*idx)+11] += alpha12*(*v);
1949       y[16*(*idx)+12] += alpha13*(*v);
1950       y[16*(*idx)+13] += alpha14*(*v);
1951       y[16*(*idx)+14] += alpha15*(*v);
1952       y[16*(*idx)+15] += alpha16*(*v);
1953       idx++; v++;
1954     }
1955   }
1956   ierr = PetscLogFlops(32*a->nz - 16*b->AIJ->cmap.n);CHKERRQ(ierr);
1957   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1958   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
1964 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1965 {
1966   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1967   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1968   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1969   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1970   PetscErrorCode ierr;
1971   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1972   PetscInt       n,i,jrow,j;
1973 
1974   PetscFunctionBegin;
1975   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1976   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1977   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1978   idx  = a->j;
1979   v    = a->a;
1980   ii   = a->i;
1981 
1982   for (i=0; i<m; i++) {
1983     jrow = ii[i];
1984     n    = ii[i+1] - jrow;
1985     sum1  = 0.0;
1986     sum2  = 0.0;
1987     sum3  = 0.0;
1988     sum4  = 0.0;
1989     sum5  = 0.0;
1990     sum6  = 0.0;
1991     sum7  = 0.0;
1992     sum8  = 0.0;
1993     sum9  = 0.0;
1994     sum10 = 0.0;
1995     sum11 = 0.0;
1996     sum12 = 0.0;
1997     sum13 = 0.0;
1998     sum14 = 0.0;
1999     sum15 = 0.0;
2000     sum16 = 0.0;
2001     for (j=0; j<n; j++) {
2002       sum1  += v[jrow]*x[16*idx[jrow]];
2003       sum2  += v[jrow]*x[16*idx[jrow]+1];
2004       sum3  += v[jrow]*x[16*idx[jrow]+2];
2005       sum4  += v[jrow]*x[16*idx[jrow]+3];
2006       sum5  += v[jrow]*x[16*idx[jrow]+4];
2007       sum6  += v[jrow]*x[16*idx[jrow]+5];
2008       sum7  += v[jrow]*x[16*idx[jrow]+6];
2009       sum8  += v[jrow]*x[16*idx[jrow]+7];
2010       sum9  += v[jrow]*x[16*idx[jrow]+8];
2011       sum10 += v[jrow]*x[16*idx[jrow]+9];
2012       sum11 += v[jrow]*x[16*idx[jrow]+10];
2013       sum12 += v[jrow]*x[16*idx[jrow]+11];
2014       sum13 += v[jrow]*x[16*idx[jrow]+12];
2015       sum14 += v[jrow]*x[16*idx[jrow]+13];
2016       sum15 += v[jrow]*x[16*idx[jrow]+14];
2017       sum16 += v[jrow]*x[16*idx[jrow]+15];
2018       jrow++;
2019      }
2020     y[16*i]    += sum1;
2021     y[16*i+1]  += sum2;
2022     y[16*i+2]  += sum3;
2023     y[16*i+3]  += sum4;
2024     y[16*i+4]  += sum5;
2025     y[16*i+5]  += sum6;
2026     y[16*i+6]  += sum7;
2027     y[16*i+7]  += sum8;
2028     y[16*i+8]  += sum9;
2029     y[16*i+9]  += sum10;
2030     y[16*i+10] += sum11;
2031     y[16*i+11] += sum12;
2032     y[16*i+12] += sum13;
2033     y[16*i+13] += sum14;
2034     y[16*i+14] += sum15;
2035     y[16*i+15] += sum16;
2036   }
2037 
2038   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
2039   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2040   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 #undef __FUNCT__
2045 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2046 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2047 {
2048   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2049   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2050   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2051   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2052   PetscErrorCode ierr;
2053   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
2054 
2055   PetscFunctionBegin;
2056   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2057   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2058   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2059   for (i=0; i<m; i++) {
2060     idx    = a->j + a->i[i] ;
2061     v      = a->a + a->i[i] ;
2062     n      = a->i[i+1] - a->i[i];
2063     alpha1 = x[16*i];
2064     alpha2 = x[16*i+1];
2065     alpha3 = x[16*i+2];
2066     alpha4 = x[16*i+3];
2067     alpha5 = x[16*i+4];
2068     alpha6 = x[16*i+5];
2069     alpha7 = x[16*i+6];
2070     alpha8 = x[16*i+7];
2071     alpha9  = x[16*i+8];
2072     alpha10 = x[16*i+9];
2073     alpha11 = x[16*i+10];
2074     alpha12 = x[16*i+11];
2075     alpha13 = x[16*i+12];
2076     alpha14 = x[16*i+13];
2077     alpha15 = x[16*i+14];
2078     alpha16 = x[16*i+15];
2079     while (n-->0) {
2080       y[16*(*idx)]   += alpha1*(*v);
2081       y[16*(*idx)+1] += alpha2*(*v);
2082       y[16*(*idx)+2] += alpha3*(*v);
2083       y[16*(*idx)+3] += alpha4*(*v);
2084       y[16*(*idx)+4] += alpha5*(*v);
2085       y[16*(*idx)+5] += alpha6*(*v);
2086       y[16*(*idx)+6] += alpha7*(*v);
2087       y[16*(*idx)+7] += alpha8*(*v);
2088       y[16*(*idx)+8]  += alpha9*(*v);
2089       y[16*(*idx)+9]  += alpha10*(*v);
2090       y[16*(*idx)+10] += alpha11*(*v);
2091       y[16*(*idx)+11] += alpha12*(*v);
2092       y[16*(*idx)+12] += alpha13*(*v);
2093       y[16*(*idx)+13] += alpha14*(*v);
2094       y[16*(*idx)+14] += alpha15*(*v);
2095       y[16*(*idx)+15] += alpha16*(*v);
2096       idx++; v++;
2097     }
2098   }
2099   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
2100   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2101   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 /*===================================================================================*/
2106 #undef __FUNCT__
2107 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2108 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2109 {
2110   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2111   PetscErrorCode ierr;
2112 
2113   PetscFunctionBegin;
2114   /* start the scatter */
2115   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2116   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2117   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2118   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 #undef __FUNCT__
2123 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2124 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2125 {
2126   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2127   PetscErrorCode ierr;
2128 
2129   PetscFunctionBegin;
2130   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2131   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2132   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2133   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 #undef __FUNCT__
2138 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2139 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2140 {
2141   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2142   PetscErrorCode ierr;
2143 
2144   PetscFunctionBegin;
2145   /* start the scatter */
2146   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2147   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2148   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2149   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2155 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2156 {
2157   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2158   PetscErrorCode ierr;
2159 
2160   PetscFunctionBegin;
2161   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2162   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2163   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2164   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 /* ----------------------------------------------------------------*/
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
2171 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2172 {
2173   /* This routine requires testing -- but it's getting better. */
2174   PetscErrorCode     ierr;
2175   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2176   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2177   Mat                P=pp->AIJ;
2178   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2179   PetscInt           *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2180   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2181   PetscInt           an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn;
2182   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2183   MatScalar          *ca;
2184 
2185   PetscFunctionBegin;
2186   /* Start timer */
2187   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
2188 
2189   /* Get ij structure of P^T */
2190   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2191 
2192   cn = pn*ppdof;
2193   /* Allocate ci array, arrays for fill computation and */
2194   /* free space for accumulating nonzero column info */
2195   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
2196   ci[0] = 0;
2197 
2198   /* Work arrays for rows of P^T*A */
2199   ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
2200   ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
2201   ptasparserow = ptadenserow  + an;
2202   denserow     = ptasparserow + an;
2203   sparserow    = denserow     + cn;
2204 
2205   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2206   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2207   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2208   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2209   current_space = free_space;
2210 
2211   /* Determine symbolic info for each row of C: */
2212   for (i=0;i<pn;i++) {
2213     ptnzi  = pti[i+1] - pti[i];
2214     ptJ    = ptj + pti[i];
2215     for (dof=0;dof<ppdof;dof++) {
2216       ptanzi = 0;
2217       /* Determine symbolic row of PtA: */
2218       for (j=0;j<ptnzi;j++) {
2219         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2220         arow = ptJ[j]*ppdof + dof;
2221         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2222         anzj = ai[arow+1] - ai[arow];
2223         ajj  = aj + ai[arow];
2224         for (k=0;k<anzj;k++) {
2225           if (!ptadenserow[ajj[k]]) {
2226             ptadenserow[ajj[k]]    = -1;
2227             ptasparserow[ptanzi++] = ajj[k];
2228           }
2229         }
2230       }
2231       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2232       ptaj = ptasparserow;
2233       cnzi   = 0;
2234       for (j=0;j<ptanzi;j++) {
2235         /* Get offset within block of P */
2236         pshift = *ptaj%ppdof;
2237         /* Get block row of P */
2238         prow = (*ptaj++)/ppdof; /* integer division */
2239         /* P has same number of nonzeros per row as the compressed form */
2240         pnzj = pi[prow+1] - pi[prow];
2241         pjj  = pj + pi[prow];
2242         for (k=0;k<pnzj;k++) {
2243           /* Locations in C are shifted by the offset within the block */
2244           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2245           if (!denserow[pjj[k]*ppdof+pshift]) {
2246             denserow[pjj[k]*ppdof+pshift] = -1;
2247             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2248           }
2249         }
2250       }
2251 
2252       /* sort sparserow */
2253       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
2254 
2255       /* If free space is not available, make more free space */
2256       /* Double the amount of total space in the list */
2257       if (current_space->local_remaining<cnzi) {
2258         ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
2259       }
2260 
2261       /* Copy data into free space, and zero out denserows */
2262       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
2263       current_space->array           += cnzi;
2264       current_space->local_used      += cnzi;
2265       current_space->local_remaining -= cnzi;
2266 
2267       for (j=0;j<ptanzi;j++) {
2268         ptadenserow[ptasparserow[j]] = 0;
2269       }
2270       for (j=0;j<cnzi;j++) {
2271         denserow[sparserow[j]] = 0;
2272       }
2273       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2274       /*        For now, we will recompute what is needed. */
2275       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2276     }
2277   }
2278   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2279   /* Allocate space for cj, initialize cj, and */
2280   /* destroy list of free space and other temporary array(s) */
2281   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
2282   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
2283   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
2284 
2285   /* Allocate space for ca */
2286   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
2287   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
2288 
2289   /* put together the new matrix */
2290   ierr = MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
2291 
2292   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2293   /* Since these are PETSc arrays, change flags to free them as necessary. */
2294   c          = (Mat_SeqAIJ *)((*C)->data);
2295   c->free_a  = PETSC_TRUE;
2296   c->free_ij = PETSC_TRUE;
2297   c->nonew   = 0;
2298 
2299   /* Clean up. */
2300   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2301 
2302   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
2303   PetscFunctionReturn(0);
2304 }
2305 
2306 #undef __FUNCT__
2307 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
2308 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2309 {
2310   /* This routine requires testing -- first draft only */
2311   PetscErrorCode ierr;
2312   PetscInt       flops=0;
2313   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
2314   Mat            P=pp->AIJ;
2315   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
2316   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
2317   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
2318   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2319   PetscInt       *ci=c->i,*cj=c->j,*cjj;
2320   PetscInt       am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof;
2321   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2322   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
2323 
2324   PetscFunctionBegin;
2325   /* Allocate temporary array for storage of one row of A*P */
2326   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
2327   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
2328 
2329   apj      = (PetscInt *)(apa + cn);
2330   apjdense = apj + cn;
2331 
2332   /* Clear old values in C */
2333   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
2334 
2335   for (i=0;i<am;i++) {
2336     /* Form sparse row of A*P */
2337     anzi  = ai[i+1] - ai[i];
2338     apnzj = 0;
2339     for (j=0;j<anzi;j++) {
2340       /* Get offset within block of P */
2341       pshift = *aj%ppdof;
2342       /* Get block row of P */
2343       prow   = *aj++/ppdof; /* integer division */
2344       pnzj = pi[prow+1] - pi[prow];
2345       pjj  = pj + pi[prow];
2346       paj  = pa + pi[prow];
2347       for (k=0;k<pnzj;k++) {
2348         poffset = pjj[k]*ppdof+pshift;
2349         if (!apjdense[poffset]) {
2350           apjdense[poffset] = -1;
2351           apj[apnzj++]      = poffset;
2352         }
2353         apa[poffset] += (*aa)*paj[k];
2354       }
2355       flops += 2*pnzj;
2356       aa++;
2357     }
2358 
2359     /* Sort the j index array for quick sparse axpy. */
2360     /* Note: a array does not need sorting as it is in dense storage locations. */
2361     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
2362 
2363     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2364     prow    = i/ppdof; /* integer division */
2365     pshift  = i%ppdof;
2366     poffset = pi[prow];
2367     pnzi = pi[prow+1] - poffset;
2368     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2369     pJ   = pj+poffset;
2370     pA   = pa+poffset;
2371     for (j=0;j<pnzi;j++) {
2372       crow   = (*pJ)*ppdof+pshift;
2373       cjj    = cj + ci[crow];
2374       caj    = ca + ci[crow];
2375       pJ++;
2376       /* Perform sparse axpy operation.  Note cjj includes apj. */
2377       for (k=0,nextap=0;nextap<apnzj;k++) {
2378         if (cjj[k]==apj[nextap]) {
2379           caj[k] += (*pA)*apa[apj[nextap++]];
2380         }
2381       }
2382       flops += 2*apnzj;
2383       pA++;
2384     }
2385 
2386     /* Zero the current row info for A*P */
2387     for (j=0;j<apnzj;j++) {
2388       apa[apj[j]]      = 0.;
2389       apjdense[apj[j]] = 0;
2390     }
2391   }
2392 
2393   /* Assemble the final matrix and clean up */
2394   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2395   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2396   ierr = PetscFree(apa);CHKERRQ(ierr);
2397   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ"
2403 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2404 {
2405   PetscErrorCode    ierr;
2406 
2407   PetscFunctionBegin;
2408   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
2409   ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr);
2410   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 #undef __FUNCT__
2415 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ"
2416 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
2417 {
2418   PetscFunctionBegin;
2419   SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 EXTERN_C_BEGIN
2424 #undef __FUNCT__
2425 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
2426 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2427 {
2428   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2429   Mat               a = b->AIJ,B;
2430   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
2431   PetscErrorCode    ierr;
2432   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2433   PetscInt          *cols;
2434   PetscScalar       *vals;
2435 
2436   PetscFunctionBegin;
2437   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
2438   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
2439   for (i=0; i<m; i++) {
2440     nmax = PetscMax(nmax,aij->ilen[i]);
2441     for (j=0; j<dof; j++) {
2442       ilen[dof*i+j] = aij->ilen[i];
2443     }
2444   }
2445   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
2446   ierr = PetscFree(ilen);CHKERRQ(ierr);
2447   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
2448   ii   = 0;
2449   for (i=0; i<m; i++) {
2450     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2451     for (j=0; j<dof; j++) {
2452       for (k=0; k<ncols; k++) {
2453         icols[k] = dof*cols[k]+j;
2454       }
2455       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2456       ii++;
2457     }
2458     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2459   }
2460   ierr = PetscFree(icols);CHKERRQ(ierr);
2461   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2462   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2463 
2464   if (reuse == MAT_REUSE_MATRIX) {
2465     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2466   } else {
2467     *newmat = B;
2468   }
2469   PetscFunctionReturn(0);
2470 }
2471 EXTERN_C_END
2472 
2473 #include "src/mat/impls/aij/mpi/mpiaij.h"
2474 
2475 EXTERN_C_BEGIN
2476 #undef __FUNCT__
2477 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
2478 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2479 {
2480   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
2481   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2482   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2483   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2484   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2485   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2486   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
2487   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
2488   PetscInt          rstart,cstart,*garray,ii,k;
2489   PetscErrorCode    ierr;
2490   PetscScalar       *vals,*ovals;
2491 
2492   PetscFunctionBegin;
2493   ierr = PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);CHKERRQ(ierr);
2494   for (i=0; i<A->rmap.n/dof; i++) {
2495     nmax  = PetscMax(nmax,AIJ->ilen[i]);
2496     onmax = PetscMax(onmax,OAIJ->ilen[i]);
2497     for (j=0; j<dof; j++) {
2498       dnz[dof*i+j] = AIJ->ilen[i];
2499       onz[dof*i+j] = OAIJ->ilen[i];
2500     }
2501   }
2502   ierr = MatCreateMPIAIJ(A->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);CHKERRQ(ierr);
2503   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
2504 
2505   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
2506   rstart = dof*maij->A->rmap.rstart;
2507   cstart = dof*maij->A->cmap.rstart;
2508   garray = mpiaij->garray;
2509 
2510   ii = rstart;
2511   for (i=0; i<A->rmap.n/dof; i++) {
2512     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2513     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
2514     for (j=0; j<dof; j++) {
2515       for (k=0; k<ncols; k++) {
2516         icols[k] = cstart + dof*cols[k]+j;
2517       }
2518       for (k=0; k<oncols; k++) {
2519         oicols[k] = dof*garray[ocols[k]]+j;
2520       }
2521       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2522       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
2523       ii++;
2524     }
2525     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2526     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
2527   }
2528   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
2529 
2530   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2531   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2532 
2533   if (reuse == MAT_REUSE_MATRIX) {
2534     PetscInt refct = A->refct; /* save A->refct */
2535     A->refct = 1;
2536     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2537     A->refct = refct; /* restore A->refct */
2538   } else {
2539     *newmat = B;
2540   }
2541   PetscFunctionReturn(0);
2542 }
2543 EXTERN_C_END
2544 
2545 
2546 /* ---------------------------------------------------------------------------------- */
2547 /*MC
2548   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
2549   operations for multicomponent problems.  It interpolates each component the same
2550   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
2551   and MATMPIAIJ for distributed matrices.
2552 
2553   Operations provided:
2554 + MatMult
2555 . MatMultTranspose
2556 . MatMultAdd
2557 . MatMultTransposeAdd
2558 - MatView
2559 
2560   Level: advanced
2561 
2562 M*/
2563 #undef __FUNCT__
2564 #define __FUNCT__ "MatCreateMAIJ"
2565 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2566 {
2567   PetscErrorCode ierr;
2568   PetscMPIInt    size;
2569   PetscInt       n;
2570   Mat_MPIMAIJ    *b;
2571   Mat            B;
2572 
2573   PetscFunctionBegin;
2574   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
2575 
2576   if (dof == 1) {
2577     *maij = A;
2578   } else {
2579     ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
2580     ierr = MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);CHKERRQ(ierr);
2581     B->assembled    = PETSC_TRUE;
2582 
2583     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2584     if (size == 1) {
2585       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
2586       B->ops->destroy = MatDestroy_SeqMAIJ;
2587       B->ops->view    = MatView_SeqMAIJ;
2588       b      = (Mat_MPIMAIJ*)B->data;
2589       b->dof = dof;
2590       b->AIJ = A;
2591       if (dof == 2) {
2592         B->ops->mult             = MatMult_SeqMAIJ_2;
2593         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
2594         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
2595         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2596       } else if (dof == 3) {
2597         B->ops->mult             = MatMult_SeqMAIJ_3;
2598         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
2599         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
2600         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2601       } else if (dof == 4) {
2602         B->ops->mult             = MatMult_SeqMAIJ_4;
2603         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
2604         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
2605         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2606       } else if (dof == 5) {
2607         B->ops->mult             = MatMult_SeqMAIJ_5;
2608         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
2609         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
2610         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2611       } else if (dof == 6) {
2612         B->ops->mult             = MatMult_SeqMAIJ_6;
2613         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
2614         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
2615         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2616       } else if (dof == 7) {
2617         B->ops->mult             = MatMult_SeqMAIJ_7;
2618         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
2619         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
2620         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2621       } else if (dof == 8) {
2622         B->ops->mult             = MatMult_SeqMAIJ_8;
2623         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
2624         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
2625         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2626       } else if (dof == 9) {
2627         B->ops->mult             = MatMult_SeqMAIJ_9;
2628         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
2629         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
2630         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2631       } else if (dof == 10) {
2632         B->ops->mult             = MatMult_SeqMAIJ_10;
2633         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
2634         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
2635         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
2636       } else if (dof == 16) {
2637         B->ops->mult             = MatMult_SeqMAIJ_16;
2638         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
2639         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
2640         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2641       } else {
2642         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2643       }
2644       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2645       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2646       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
2647     } else {
2648       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2649       IS         from,to;
2650       Vec        gvec;
2651       PetscInt   *garray,i;
2652 
2653       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
2654       B->ops->destroy = MatDestroy_MPIMAIJ;
2655       B->ops->view    = MatView_MPIMAIJ;
2656       b      = (Mat_MPIMAIJ*)B->data;
2657       b->dof = dof;
2658       b->A   = A;
2659       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
2660       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
2661 
2662       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
2663       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
2664       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
2665 
2666       /* create two temporary Index sets for build scatter gather */
2667       ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2668       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2669       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
2670       ierr = PetscFree(garray);CHKERRQ(ierr);
2671       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
2672 
2673       /* create temporary global vector to generate scatter context */
2674       ierr = VecCreateMPI(A->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);CHKERRQ(ierr);
2675       ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr);
2676 
2677       /* generate the scatter context */
2678       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
2679 
2680       ierr = ISDestroy(from);CHKERRQ(ierr);
2681       ierr = ISDestroy(to);CHKERRQ(ierr);
2682       ierr = VecDestroy(gvec);CHKERRQ(ierr);
2683 
2684       B->ops->mult             = MatMult_MPIMAIJ_dof;
2685       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
2686       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
2687       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
2688       B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
2689       B->ops->ptapnumeric_mpiaij  = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
2690       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
2691     }
2692     *maij = B;
2693     ierr = MatView_Private(B);CHKERRQ(ierr);
2694   }
2695   PetscFunctionReturn(0);
2696 }
2697 
2698 
2699 
2700 
2701 
2702 
2703 
2704 
2705 
2706 
2707 
2708 
2709