xref: /petsc/src/mat/impls/maij/maij.c (revision 2205254efee3a00a594e5e2a3a70f74dcb40bc03)
1 
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20 #include <../src/mat/utils/freespace.h>
21 #include <petsc-private/vecimpl.h>
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatMAIJGetAIJ"
25 /*@C
26    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
27 
28    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
29 
30    Input Parameter:
31 .  A - the MAIJ matrix
32 
33    Output Parameter:
34 .  B - the AIJ matrix
35 
36    Level: advanced
37 
38    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
39 
40 .seealso: MatCreateMAIJ()
41 @*/
42 PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
43 {
44   PetscErrorCode ierr;
45   PetscBool      ismpimaij,isseqmaij;
46 
47   PetscFunctionBegin;
48   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
49   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
50   if (ismpimaij) {
51     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
52 
53     *B = b->A;
54   } else if (isseqmaij) {
55     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
56 
57     *B = b->AIJ;
58   } else {
59     *B = A;
60   }
61   PetscFunctionReturn(0);
62 }
63 
64 #undef __FUNCT__
65 #define __FUNCT__ "MatMAIJRedimension"
66 /*@C
67    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
68 
69    Logically Collective
70 
71    Input Parameter:
72 +  A - the MAIJ matrix
73 -  dof - the block size for the new matrix
74 
75    Output Parameter:
76 .  B - the new MAIJ matrix
77 
78    Level: advanced
79 
80 .seealso: MatCreateMAIJ()
81 @*/
82 PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
83 {
84   PetscErrorCode ierr;
85   Mat            Aij = PETSC_NULL;
86 
87   PetscFunctionBegin;
88   PetscValidLogicalCollectiveInt(A,dof,2);
89   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
90   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 
94 #undef __FUNCT__
95 #define __FUNCT__ "MatDestroy_SeqMAIJ"
96 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
97 {
98   PetscErrorCode ierr;
99   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
100 
101   PetscFunctionBegin;
102   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
103   ierr = PetscFree(A->data);CHKERRQ(ierr);
104   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatConvert_seqmaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
105   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatPtAP_seqaij_seqmaij_C","",PETSC_NULL);CHKERRQ(ierr);
106   PetscFunctionReturn(0);
107 }
108 
109 #undef __FUNCT__
110 #define __FUNCT__ "MatSetUp_MAIJ"
111 PetscErrorCode MatSetUp_MAIJ(Mat A)
112 {
113   PetscFunctionBegin;
114   SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
115   PetscFunctionReturn(0);
116 }
117 
118 #undef __FUNCT__
119 #define __FUNCT__ "MatView_SeqMAIJ"
120 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
121 {
122   PetscErrorCode ierr;
123   Mat            B;
124 
125   PetscFunctionBegin;
126   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
127   ierr = MatView(B,viewer);CHKERRQ(ierr);
128   ierr = MatDestroy(&B);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "MatView_MPIMAIJ"
134 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
135 {
136   PetscErrorCode ierr;
137   Mat            B;
138 
139   PetscFunctionBegin;
140   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
141   ierr = MatView(B,viewer);CHKERRQ(ierr);
142   ierr = MatDestroy(&B);CHKERRQ(ierr);
143   PetscFunctionReturn(0);
144 }
145 
146 #undef __FUNCT__
147 #define __FUNCT__ "MatDestroy_MPIMAIJ"
148 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
149 {
150   PetscErrorCode ierr;
151   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
152 
153   PetscFunctionBegin;
154   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
155   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
156   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
157   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
158   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
159   ierr = PetscFree(A->data);CHKERRQ(ierr);
160   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatConvert_mpimaij_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
161   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C","",PETSC_NULL);CHKERRQ(ierr);
162   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
163   PetscFunctionReturn(0);
164 }
165 
166 /*MC
167   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
168   multicomponent problems, interpolating or restricting each component the same way independently.
169   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
170 
171   Operations provided:
172 . MatMult
173 . MatMultTranspose
174 . MatMultAdd
175 . MatMultTransposeAdd
176 
177   Level: advanced
178 
179 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
180 M*/
181 
182 #undef __FUNCT__
183 #define __FUNCT__ "MatCreate_MAIJ"
184 PETSC_EXTERN_C PetscErrorCode MatCreate_MAIJ(Mat A)
185 {
186   PetscErrorCode ierr;
187   Mat_MPIMAIJ    *b;
188   PetscMPIInt    size;
189 
190   PetscFunctionBegin;
191   ierr     = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr);
192   A->data  = (void*)b;
193   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
194   A->ops->setup = MatSetUp_MAIJ;
195 
196   b->AIJ  = 0;
197   b->dof  = 0;
198   b->OAIJ = 0;
199   b->ctx  = 0;
200   b->w    = 0;
201   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
202   if (size == 1) {
203     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
204   } else {
205     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
206   }
207   PetscFunctionReturn(0);
208 }
209 
210 /* --------------------------------------------------------------------------------------*/
211 #undef __FUNCT__
212 #define __FUNCT__ "MatMult_SeqMAIJ_2"
213 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
214 {
215   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
216   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
217   const PetscScalar *x,*v;
218   PetscScalar       *y, sum1, sum2;
219   PetscErrorCode    ierr;
220   PetscInt          nonzerorow=0,n,i,jrow,j;
221   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
222 
223   PetscFunctionBegin;
224   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
225   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
226   idx  = a->j;
227   v    = a->a;
228   ii   = a->i;
229 
230   for (i=0; i<m; i++) {
231     jrow = ii[i];
232     n    = ii[i+1] - jrow;
233     sum1  = 0.0;
234     sum2  = 0.0;
235     nonzerorow += (n>0);
236     for (j=0; j<n; j++) {
237       sum1 += v[jrow]*x[2*idx[jrow]];
238       sum2 += v[jrow]*x[2*idx[jrow]+1];
239       jrow++;
240      }
241     y[2*i]   = sum1;
242     y[2*i+1] = sum2;
243   }
244 
245   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
246   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
247   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
248   PetscFunctionReturn(0);
249 }
250 
251 #undef __FUNCT__
252 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
253 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
254 {
255   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
256   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
257   const PetscScalar *x,*v;
258   PetscScalar       *y,alpha1,alpha2;
259   PetscErrorCode    ierr;
260   PetscInt          n,i;
261   const PetscInt    m = b->AIJ->rmap->n,*idx;
262 
263   PetscFunctionBegin;
264   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
265   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
266   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
267 
268   for (i=0; i<m; i++) {
269     idx    = a->j + a->i[i] ;
270     v      = a->a + a->i[i] ;
271     n      = a->i[i+1] - a->i[i];
272     alpha1 = x[2*i];
273     alpha2 = x[2*i+1];
274     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
275   }
276   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
277   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
278   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
284 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
285 {
286   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
287   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
288   const PetscScalar *x,*v;
289   PetscScalar       *y,sum1, sum2;
290   PetscErrorCode    ierr;
291   PetscInt          n,i,jrow,j;
292   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
293 
294   PetscFunctionBegin;
295   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
296   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
297   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
298   idx  = a->j;
299   v    = a->a;
300   ii   = a->i;
301 
302   for (i=0; i<m; i++) {
303     jrow = ii[i];
304     n    = ii[i+1] - jrow;
305     sum1  = 0.0;
306     sum2  = 0.0;
307     for (j=0; j<n; j++) {
308       sum1 += v[jrow]*x[2*idx[jrow]];
309       sum2 += v[jrow]*x[2*idx[jrow]+1];
310       jrow++;
311      }
312     y[2*i]   += sum1;
313     y[2*i+1] += sum2;
314   }
315 
316   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
317   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
318   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 #undef __FUNCT__
322 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
323 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
324 {
325   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
326   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
327   const PetscScalar *x,*v;
328   PetscScalar       *y,alpha1,alpha2;
329   PetscErrorCode    ierr;
330   PetscInt          n,i;
331   const PetscInt    m = b->AIJ->rmap->n,*idx;
332 
333   PetscFunctionBegin;
334   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
335   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
336   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
337 
338   for (i=0; i<m; i++) {
339     idx   = a->j + a->i[i] ;
340     v     = a->a + a->i[i] ;
341     n     = a->i[i+1] - a->i[i];
342     alpha1 = x[2*i];
343     alpha2 = x[2*i+1];
344     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
345   }
346   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
347   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
348   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 /* --------------------------------------------------------------------------------------*/
352 #undef __FUNCT__
353 #define __FUNCT__ "MatMult_SeqMAIJ_3"
354 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
355 {
356   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
357   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
358   const PetscScalar *x,*v;
359   PetscScalar       *y,sum1, sum2, sum3;
360   PetscErrorCode    ierr;
361   PetscInt          nonzerorow=0,n,i,jrow,j;
362   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
363 
364   PetscFunctionBegin;
365   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
366   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
367   idx  = a->j;
368   v    = a->a;
369   ii   = a->i;
370 
371   for (i=0; i<m; i++) {
372     jrow = ii[i];
373     n    = ii[i+1] - jrow;
374     sum1  = 0.0;
375     sum2  = 0.0;
376     sum3  = 0.0;
377     nonzerorow += (n>0);
378     for (j=0; j<n; j++) {
379       sum1 += v[jrow]*x[3*idx[jrow]];
380       sum2 += v[jrow]*x[3*idx[jrow]+1];
381       sum3 += v[jrow]*x[3*idx[jrow]+2];
382       jrow++;
383      }
384     y[3*i]   = sum1;
385     y[3*i+1] = sum2;
386     y[3*i+2] = sum3;
387   }
388 
389   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
390   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
391   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
392   PetscFunctionReturn(0);
393 }
394 
395 #undef __FUNCT__
396 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
397 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
398 {
399   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
400   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
401   const PetscScalar *x,*v;
402   PetscScalar       *y,alpha1,alpha2,alpha3;
403   PetscErrorCode    ierr;
404   PetscInt          n,i;
405   const PetscInt    m = b->AIJ->rmap->n,*idx;
406 
407   PetscFunctionBegin;
408   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
409   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
410   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
411 
412   for (i=0; i<m; i++) {
413     idx    = a->j + a->i[i];
414     v      = a->a + a->i[i];
415     n      = a->i[i+1] - a->i[i];
416     alpha1 = x[3*i];
417     alpha2 = x[3*i+1];
418     alpha3 = x[3*i+2];
419     while (n-->0) {
420       y[3*(*idx)]   += alpha1*(*v);
421       y[3*(*idx)+1] += alpha2*(*v);
422       y[3*(*idx)+2] += alpha3*(*v);
423       idx++; v++;
424     }
425   }
426   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
427   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
428   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNCT__
433 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
434 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
435 {
436   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
437   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
438   const PetscScalar *x,*v;
439   PetscScalar       *y,sum1, sum2, sum3;
440   PetscErrorCode    ierr;
441   PetscInt          n,i,jrow,j;
442   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
443 
444   PetscFunctionBegin;
445   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
446   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
447   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
448   idx  = a->j;
449   v    = a->a;
450   ii   = a->i;
451 
452   for (i=0; i<m; i++) {
453     jrow = ii[i];
454     n    = ii[i+1] - jrow;
455     sum1  = 0.0;
456     sum2  = 0.0;
457     sum3  = 0.0;
458     for (j=0; j<n; j++) {
459       sum1 += v[jrow]*x[3*idx[jrow]];
460       sum2 += v[jrow]*x[3*idx[jrow]+1];
461       sum3 += v[jrow]*x[3*idx[jrow]+2];
462       jrow++;
463      }
464     y[3*i]   += sum1;
465     y[3*i+1] += sum2;
466     y[3*i+2] += sum3;
467   }
468 
469   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
470   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
471   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
472   PetscFunctionReturn(0);
473 }
474 #undef __FUNCT__
475 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
476 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
477 {
478   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
479   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
480   const PetscScalar *x,*v;
481   PetscScalar       *y,alpha1,alpha2,alpha3;
482   PetscErrorCode    ierr;
483   PetscInt          n,i;
484   const PetscInt    m = b->AIJ->rmap->n,*idx;
485 
486   PetscFunctionBegin;
487   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
488   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
489   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
490   for (i=0; i<m; i++) {
491     idx    = a->j + a->i[i] ;
492     v      = a->a + a->i[i] ;
493     n      = a->i[i+1] - a->i[i];
494     alpha1 = x[3*i];
495     alpha2 = x[3*i+1];
496     alpha3 = x[3*i+2];
497     while (n-->0) {
498       y[3*(*idx)]   += alpha1*(*v);
499       y[3*(*idx)+1] += alpha2*(*v);
500       y[3*(*idx)+2] += alpha3*(*v);
501       idx++; v++;
502     }
503   }
504   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
505   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
506   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 /* ------------------------------------------------------------------------------*/
511 #undef __FUNCT__
512 #define __FUNCT__ "MatMult_SeqMAIJ_4"
513 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
514 {
515   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
516   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
517   const PetscScalar *x,*v;
518   PetscScalar       *y,sum1, sum2, sum3, sum4;
519   PetscErrorCode    ierr;
520   PetscInt          nonzerorow=0,n,i,jrow,j;
521   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
522 
523   PetscFunctionBegin;
524   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
525   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
526   idx  = a->j;
527   v    = a->a;
528   ii   = a->i;
529 
530   for (i=0; i<m; i++) {
531     jrow = ii[i];
532     n    = ii[i+1] - jrow;
533     sum1  = 0.0;
534     sum2  = 0.0;
535     sum3  = 0.0;
536     sum4  = 0.0;
537     nonzerorow += (n>0);
538     for (j=0; j<n; j++) {
539       sum1 += v[jrow]*x[4*idx[jrow]];
540       sum2 += v[jrow]*x[4*idx[jrow]+1];
541       sum3 += v[jrow]*x[4*idx[jrow]+2];
542       sum4 += v[jrow]*x[4*idx[jrow]+3];
543       jrow++;
544      }
545     y[4*i]   = sum1;
546     y[4*i+1] = sum2;
547     y[4*i+2] = sum3;
548     y[4*i+3] = sum4;
549   }
550 
551   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
552   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
553   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
554   PetscFunctionReturn(0);
555 }
556 
557 #undef __FUNCT__
558 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
559 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
560 {
561   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
562   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
563   const PetscScalar *x,*v;
564   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
565   PetscErrorCode    ierr;
566   PetscInt          n,i;
567   const PetscInt    m = b->AIJ->rmap->n,*idx;
568 
569   PetscFunctionBegin;
570   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
571   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
572   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
573   for (i=0; i<m; i++) {
574     idx    = a->j + a->i[i] ;
575     v      = a->a + a->i[i] ;
576     n      = a->i[i+1] - a->i[i];
577     alpha1 = x[4*i];
578     alpha2 = x[4*i+1];
579     alpha3 = x[4*i+2];
580     alpha4 = x[4*i+3];
581     while (n-->0) {
582       y[4*(*idx)]   += alpha1*(*v);
583       y[4*(*idx)+1] += alpha2*(*v);
584       y[4*(*idx)+2] += alpha3*(*v);
585       y[4*(*idx)+3] += alpha4*(*v);
586       idx++; v++;
587     }
588   }
589   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
590   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
591   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
592   PetscFunctionReturn(0);
593 }
594 
595 #undef __FUNCT__
596 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
597 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
598 {
599   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
600   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
601   const PetscScalar *x,*v;
602   PetscScalar       *y,sum1, sum2, sum3, sum4;
603   PetscErrorCode    ierr;
604   PetscInt          n,i,jrow,j;
605   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
606 
607   PetscFunctionBegin;
608   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
609   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
610   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
611   idx  = a->j;
612   v    = a->a;
613   ii   = a->i;
614 
615   for (i=0; i<m; i++) {
616     jrow = ii[i];
617     n    = ii[i+1] - jrow;
618     sum1  = 0.0;
619     sum2  = 0.0;
620     sum3  = 0.0;
621     sum4  = 0.0;
622     for (j=0; j<n; j++) {
623       sum1 += v[jrow]*x[4*idx[jrow]];
624       sum2 += v[jrow]*x[4*idx[jrow]+1];
625       sum3 += v[jrow]*x[4*idx[jrow]+2];
626       sum4 += v[jrow]*x[4*idx[jrow]+3];
627       jrow++;
628      }
629     y[4*i]   += sum1;
630     y[4*i+1] += sum2;
631     y[4*i+2] += sum3;
632     y[4*i+3] += sum4;
633   }
634 
635   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
636   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
637   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
638   PetscFunctionReturn(0);
639 }
640 #undef __FUNCT__
641 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
642 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
643 {
644   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
645   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
646   const PetscScalar *x,*v;
647   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
648   PetscErrorCode    ierr;
649   PetscInt          n,i;
650   const PetscInt    m = b->AIJ->rmap->n,*idx;
651 
652   PetscFunctionBegin;
653   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
654   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
655   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
656 
657   for (i=0; i<m; i++) {
658     idx    = a->j + a->i[i] ;
659     v      = a->a + a->i[i] ;
660     n      = a->i[i+1] - a->i[i];
661     alpha1 = x[4*i];
662     alpha2 = x[4*i+1];
663     alpha3 = x[4*i+2];
664     alpha4 = x[4*i+3];
665     while (n-->0) {
666       y[4*(*idx)]   += alpha1*(*v);
667       y[4*(*idx)+1] += alpha2*(*v);
668       y[4*(*idx)+2] += alpha3*(*v);
669       y[4*(*idx)+3] += alpha4*(*v);
670       idx++; v++;
671     }
672   }
673   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
674   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
675   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
676   PetscFunctionReturn(0);
677 }
678 /* ------------------------------------------------------------------------------*/
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatMult_SeqMAIJ_5"
682 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
683 {
684   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
685   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
686   const PetscScalar *x,*v;
687   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
688   PetscErrorCode    ierr;
689   PetscInt          nonzerorow=0,n,i,jrow,j;
690   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
691 
692   PetscFunctionBegin;
693   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
694   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
695   idx  = a->j;
696   v    = a->a;
697   ii   = a->i;
698 
699   for (i=0; i<m; i++) {
700     jrow = ii[i];
701     n    = ii[i+1] - jrow;
702     sum1  = 0.0;
703     sum2  = 0.0;
704     sum3  = 0.0;
705     sum4  = 0.0;
706     sum5  = 0.0;
707     nonzerorow += (n>0);
708     for (j=0; j<n; j++) {
709       sum1 += v[jrow]*x[5*idx[jrow]];
710       sum2 += v[jrow]*x[5*idx[jrow]+1];
711       sum3 += v[jrow]*x[5*idx[jrow]+2];
712       sum4 += v[jrow]*x[5*idx[jrow]+3];
713       sum5 += v[jrow]*x[5*idx[jrow]+4];
714       jrow++;
715      }
716     y[5*i]   = sum1;
717     y[5*i+1] = sum2;
718     y[5*i+2] = sum3;
719     y[5*i+3] = sum4;
720     y[5*i+4] = sum5;
721   }
722 
723   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
724   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
725   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
731 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
732 {
733   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
734   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
735   const PetscScalar *x,*v;
736   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
737   PetscErrorCode    ierr;
738   PetscInt          n,i;
739   const PetscInt    m = b->AIJ->rmap->n,*idx;
740 
741   PetscFunctionBegin;
742   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
743   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
744   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
745 
746   for (i=0; i<m; i++) {
747     idx    = a->j + a->i[i] ;
748     v      = a->a + a->i[i] ;
749     n      = a->i[i+1] - a->i[i];
750     alpha1 = x[5*i];
751     alpha2 = x[5*i+1];
752     alpha3 = x[5*i+2];
753     alpha4 = x[5*i+3];
754     alpha5 = x[5*i+4];
755     while (n-->0) {
756       y[5*(*idx)]   += alpha1*(*v);
757       y[5*(*idx)+1] += alpha2*(*v);
758       y[5*(*idx)+2] += alpha3*(*v);
759       y[5*(*idx)+3] += alpha4*(*v);
760       y[5*(*idx)+4] += alpha5*(*v);
761       idx++; v++;
762     }
763   }
764   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
765   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
766   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
767   PetscFunctionReturn(0);
768 }
769 
770 #undef __FUNCT__
771 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
772 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
773 {
774   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
775   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
776   const PetscScalar *x,*v;
777   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
778   PetscErrorCode    ierr;
779   PetscInt          n,i,jrow,j;
780   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
781 
782   PetscFunctionBegin;
783   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
784   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
785   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
786   idx  = a->j;
787   v    = a->a;
788   ii   = a->i;
789 
790   for (i=0; i<m; i++) {
791     jrow = ii[i];
792     n    = ii[i+1] - jrow;
793     sum1  = 0.0;
794     sum2  = 0.0;
795     sum3  = 0.0;
796     sum4  = 0.0;
797     sum5  = 0.0;
798     for (j=0; j<n; j++) {
799       sum1 += v[jrow]*x[5*idx[jrow]];
800       sum2 += v[jrow]*x[5*idx[jrow]+1];
801       sum3 += v[jrow]*x[5*idx[jrow]+2];
802       sum4 += v[jrow]*x[5*idx[jrow]+3];
803       sum5 += v[jrow]*x[5*idx[jrow]+4];
804       jrow++;
805      }
806     y[5*i]   += sum1;
807     y[5*i+1] += sum2;
808     y[5*i+2] += sum3;
809     y[5*i+3] += sum4;
810     y[5*i+4] += sum5;
811   }
812 
813   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
814   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
815   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
821 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
822 {
823   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
824   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
825   const PetscScalar *x,*v;
826   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
827   PetscErrorCode    ierr;
828   PetscInt          n,i;
829   const PetscInt    m = b->AIJ->rmap->n,*idx;
830 
831   PetscFunctionBegin;
832   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
833   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
834   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
835 
836   for (i=0; i<m; i++) {
837     idx    = a->j + a->i[i] ;
838     v      = a->a + a->i[i] ;
839     n      = a->i[i+1] - a->i[i];
840     alpha1 = x[5*i];
841     alpha2 = x[5*i+1];
842     alpha3 = x[5*i+2];
843     alpha4 = x[5*i+3];
844     alpha5 = x[5*i+4];
845     while (n-->0) {
846       y[5*(*idx)]   += alpha1*(*v);
847       y[5*(*idx)+1] += alpha2*(*v);
848       y[5*(*idx)+2] += alpha3*(*v);
849       y[5*(*idx)+3] += alpha4*(*v);
850       y[5*(*idx)+4] += alpha5*(*v);
851       idx++; v++;
852     }
853   }
854   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
855   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
856   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
857   PetscFunctionReturn(0);
858 }
859 
860 /* ------------------------------------------------------------------------------*/
861 #undef __FUNCT__
862 #define __FUNCT__ "MatMult_SeqMAIJ_6"
863 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
864 {
865   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
866   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
867   const PetscScalar *x,*v;
868   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
869   PetscErrorCode    ierr;
870   PetscInt          nonzerorow=0,n,i,jrow,j;
871   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
872 
873   PetscFunctionBegin;
874   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
875   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
876   idx  = a->j;
877   v    = a->a;
878   ii   = a->i;
879 
880   for (i=0; i<m; i++) {
881     jrow = ii[i];
882     n    = ii[i+1] - jrow;
883     sum1  = 0.0;
884     sum2  = 0.0;
885     sum3  = 0.0;
886     sum4  = 0.0;
887     sum5  = 0.0;
888     sum6  = 0.0;
889     nonzerorow += (n>0);
890     for (j=0; j<n; j++) {
891       sum1 += v[jrow]*x[6*idx[jrow]];
892       sum2 += v[jrow]*x[6*idx[jrow]+1];
893       sum3 += v[jrow]*x[6*idx[jrow]+2];
894       sum4 += v[jrow]*x[6*idx[jrow]+3];
895       sum5 += v[jrow]*x[6*idx[jrow]+4];
896       sum6 += v[jrow]*x[6*idx[jrow]+5];
897       jrow++;
898      }
899     y[6*i]   = sum1;
900     y[6*i+1] = sum2;
901     y[6*i+2] = sum3;
902     y[6*i+3] = sum4;
903     y[6*i+4] = sum5;
904     y[6*i+5] = sum6;
905   }
906 
907   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
908   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
909   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
910   PetscFunctionReturn(0);
911 }
912 
913 #undef __FUNCT__
914 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
915 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
916 {
917   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
918   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
919   const PetscScalar *x,*v;
920   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
921   PetscErrorCode    ierr;
922   PetscInt          n,i;
923   const PetscInt    m = b->AIJ->rmap->n,*idx;
924 
925   PetscFunctionBegin;
926   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
927   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
928   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
929 
930   for (i=0; i<m; i++) {
931     idx    = a->j + a->i[i] ;
932     v      = a->a + a->i[i] ;
933     n      = a->i[i+1] - a->i[i];
934     alpha1 = x[6*i];
935     alpha2 = x[6*i+1];
936     alpha3 = x[6*i+2];
937     alpha4 = x[6*i+3];
938     alpha5 = x[6*i+4];
939     alpha6 = x[6*i+5];
940     while (n-->0) {
941       y[6*(*idx)]   += alpha1*(*v);
942       y[6*(*idx)+1] += alpha2*(*v);
943       y[6*(*idx)+2] += alpha3*(*v);
944       y[6*(*idx)+3] += alpha4*(*v);
945       y[6*(*idx)+4] += alpha5*(*v);
946       y[6*(*idx)+5] += alpha6*(*v);
947       idx++; v++;
948     }
949   }
950   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
951   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
952   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
953   PetscFunctionReturn(0);
954 }
955 
956 #undef __FUNCT__
957 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
958 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
959 {
960   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
961   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
962   const PetscScalar *x,*v;
963   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
964   PetscErrorCode    ierr;
965   PetscInt          n,i,jrow,j;
966   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
967 
968   PetscFunctionBegin;
969   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
970   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
971   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
972   idx  = a->j;
973   v    = a->a;
974   ii   = a->i;
975 
976   for (i=0; i<m; i++) {
977     jrow = ii[i];
978     n    = ii[i+1] - jrow;
979     sum1  = 0.0;
980     sum2  = 0.0;
981     sum3  = 0.0;
982     sum4  = 0.0;
983     sum5  = 0.0;
984     sum6  = 0.0;
985     for (j=0; j<n; j++) {
986       sum1 += v[jrow]*x[6*idx[jrow]];
987       sum2 += v[jrow]*x[6*idx[jrow]+1];
988       sum3 += v[jrow]*x[6*idx[jrow]+2];
989       sum4 += v[jrow]*x[6*idx[jrow]+3];
990       sum5 += v[jrow]*x[6*idx[jrow]+4];
991       sum6 += v[jrow]*x[6*idx[jrow]+5];
992       jrow++;
993      }
994     y[6*i]   += sum1;
995     y[6*i+1] += sum2;
996     y[6*i+2] += sum3;
997     y[6*i+3] += sum4;
998     y[6*i+4] += sum5;
999     y[6*i+5] += sum6;
1000   }
1001 
1002   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1003   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1004   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
1010 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
1011 {
1012   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1013   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1014   const PetscScalar *x,*v;
1015   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1016   PetscErrorCode    ierr;
1017   PetscInt          n,i;
1018   const PetscInt    m = b->AIJ->rmap->n,*idx;
1019 
1020   PetscFunctionBegin;
1021   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1022   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1023   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1024 
1025   for (i=0; i<m; i++) {
1026     idx    = a->j + a->i[i] ;
1027     v      = a->a + a->i[i] ;
1028     n      = a->i[i+1] - a->i[i];
1029     alpha1 = x[6*i];
1030     alpha2 = x[6*i+1];
1031     alpha3 = x[6*i+2];
1032     alpha4 = x[6*i+3];
1033     alpha5 = x[6*i+4];
1034     alpha6 = x[6*i+5];
1035     while (n-->0) {
1036       y[6*(*idx)]   += alpha1*(*v);
1037       y[6*(*idx)+1] += alpha2*(*v);
1038       y[6*(*idx)+2] += alpha3*(*v);
1039       y[6*(*idx)+3] += alpha4*(*v);
1040       y[6*(*idx)+4] += alpha5*(*v);
1041       y[6*(*idx)+5] += alpha6*(*v);
1042       idx++; v++;
1043     }
1044   }
1045   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1046   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1047   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 /* ------------------------------------------------------------------------------*/
1052 #undef __FUNCT__
1053 #define __FUNCT__ "MatMult_SeqMAIJ_7"
1054 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1055 {
1056   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1057   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1058   const PetscScalar *x,*v;
1059   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1060   PetscErrorCode    ierr;
1061   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1062   PetscInt          nonzerorow=0,n,i,jrow,j;
1063 
1064   PetscFunctionBegin;
1065   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1066   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1067   idx  = a->j;
1068   v    = a->a;
1069   ii   = a->i;
1070 
1071   for (i=0; i<m; i++) {
1072     jrow = ii[i];
1073     n    = ii[i+1] - jrow;
1074     sum1  = 0.0;
1075     sum2  = 0.0;
1076     sum3  = 0.0;
1077     sum4  = 0.0;
1078     sum5  = 0.0;
1079     sum6  = 0.0;
1080     sum7  = 0.0;
1081     nonzerorow += (n>0);
1082     for (j=0; j<n; j++) {
1083       sum1 += v[jrow]*x[7*idx[jrow]];
1084       sum2 += v[jrow]*x[7*idx[jrow]+1];
1085       sum3 += v[jrow]*x[7*idx[jrow]+2];
1086       sum4 += v[jrow]*x[7*idx[jrow]+3];
1087       sum5 += v[jrow]*x[7*idx[jrow]+4];
1088       sum6 += v[jrow]*x[7*idx[jrow]+5];
1089       sum7 += v[jrow]*x[7*idx[jrow]+6];
1090       jrow++;
1091      }
1092     y[7*i]   = sum1;
1093     y[7*i+1] = sum2;
1094     y[7*i+2] = sum3;
1095     y[7*i+3] = sum4;
1096     y[7*i+4] = sum5;
1097     y[7*i+5] = sum6;
1098     y[7*i+6] = sum7;
1099   }
1100 
1101   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
1102   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1103   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1109 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1110 {
1111   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1112   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1113   const PetscScalar *x,*v;
1114   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1115   PetscErrorCode    ierr;
1116   const PetscInt    m = b->AIJ->rmap->n,*idx;
1117   PetscInt          n,i;
1118 
1119   PetscFunctionBegin;
1120   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1121   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1122   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1123 
1124   for (i=0; i<m; i++) {
1125     idx    = a->j + a->i[i] ;
1126     v      = a->a + a->i[i] ;
1127     n      = a->i[i+1] - a->i[i];
1128     alpha1 = x[7*i];
1129     alpha2 = x[7*i+1];
1130     alpha3 = x[7*i+2];
1131     alpha4 = x[7*i+3];
1132     alpha5 = x[7*i+4];
1133     alpha6 = x[7*i+5];
1134     alpha7 = x[7*i+6];
1135     while (n-->0) {
1136       y[7*(*idx)]   += alpha1*(*v);
1137       y[7*(*idx)+1] += alpha2*(*v);
1138       y[7*(*idx)+2] += alpha3*(*v);
1139       y[7*(*idx)+3] += alpha4*(*v);
1140       y[7*(*idx)+4] += alpha5*(*v);
1141       y[7*(*idx)+5] += alpha6*(*v);
1142       y[7*(*idx)+6] += alpha7*(*v);
1143       idx++; v++;
1144     }
1145   }
1146   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1147   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1148   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 #undef __FUNCT__
1153 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1154 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1155 {
1156   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1157   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1158   const PetscScalar *x,*v;
1159   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1160   PetscErrorCode    ierr;
1161   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1162   PetscInt          n,i,jrow,j;
1163 
1164   PetscFunctionBegin;
1165   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1166   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1167   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1168   idx  = a->j;
1169   v    = a->a;
1170   ii   = a->i;
1171 
1172   for (i=0; i<m; i++) {
1173     jrow = ii[i];
1174     n    = ii[i+1] - jrow;
1175     sum1  = 0.0;
1176     sum2  = 0.0;
1177     sum3  = 0.0;
1178     sum4  = 0.0;
1179     sum5  = 0.0;
1180     sum6  = 0.0;
1181     sum7  = 0.0;
1182     for (j=0; j<n; j++) {
1183       sum1 += v[jrow]*x[7*idx[jrow]];
1184       sum2 += v[jrow]*x[7*idx[jrow]+1];
1185       sum3 += v[jrow]*x[7*idx[jrow]+2];
1186       sum4 += v[jrow]*x[7*idx[jrow]+3];
1187       sum5 += v[jrow]*x[7*idx[jrow]+4];
1188       sum6 += v[jrow]*x[7*idx[jrow]+5];
1189       sum7 += v[jrow]*x[7*idx[jrow]+6];
1190       jrow++;
1191      }
1192     y[7*i]   += sum1;
1193     y[7*i+1] += sum2;
1194     y[7*i+2] += sum3;
1195     y[7*i+3] += sum4;
1196     y[7*i+4] += sum5;
1197     y[7*i+5] += sum6;
1198     y[7*i+6] += sum7;
1199   }
1200 
1201   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1202   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1203   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1209 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1210 {
1211   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1212   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1213   const PetscScalar *x,*v;
1214   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1215   PetscErrorCode    ierr;
1216   const PetscInt    m = b->AIJ->rmap->n,*idx;
1217   PetscInt          n,i;
1218 
1219   PetscFunctionBegin;
1220   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1221   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1222   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1223   for (i=0; i<m; i++) {
1224     idx    = a->j + a->i[i] ;
1225     v      = a->a + a->i[i] ;
1226     n      = a->i[i+1] - a->i[i];
1227     alpha1 = x[7*i];
1228     alpha2 = x[7*i+1];
1229     alpha3 = x[7*i+2];
1230     alpha4 = x[7*i+3];
1231     alpha5 = x[7*i+4];
1232     alpha6 = x[7*i+5];
1233     alpha7 = x[7*i+6];
1234     while (n-->0) {
1235       y[7*(*idx)]   += alpha1*(*v);
1236       y[7*(*idx)+1] += alpha2*(*v);
1237       y[7*(*idx)+2] += alpha3*(*v);
1238       y[7*(*idx)+3] += alpha4*(*v);
1239       y[7*(*idx)+4] += alpha5*(*v);
1240       y[7*(*idx)+5] += alpha6*(*v);
1241       y[7*(*idx)+6] += alpha7*(*v);
1242       idx++; v++;
1243     }
1244   }
1245   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1246   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1247   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatMult_SeqMAIJ_8"
1253 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1254 {
1255   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1256   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1257   const PetscScalar *x,*v;
1258   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1259   PetscErrorCode    ierr;
1260   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1261   PetscInt          nonzerorow=0,n,i,jrow,j;
1262 
1263   PetscFunctionBegin;
1264   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1265   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1266   idx  = a->j;
1267   v    = a->a;
1268   ii   = a->i;
1269 
1270   for (i=0; i<m; i++) {
1271     jrow = ii[i];
1272     n    = ii[i+1] - jrow;
1273     sum1  = 0.0;
1274     sum2  = 0.0;
1275     sum3  = 0.0;
1276     sum4  = 0.0;
1277     sum5  = 0.0;
1278     sum6  = 0.0;
1279     sum7  = 0.0;
1280     sum8  = 0.0;
1281     nonzerorow += (n>0);
1282     for (j=0; j<n; j++) {
1283       sum1 += v[jrow]*x[8*idx[jrow]];
1284       sum2 += v[jrow]*x[8*idx[jrow]+1];
1285       sum3 += v[jrow]*x[8*idx[jrow]+2];
1286       sum4 += v[jrow]*x[8*idx[jrow]+3];
1287       sum5 += v[jrow]*x[8*idx[jrow]+4];
1288       sum6 += v[jrow]*x[8*idx[jrow]+5];
1289       sum7 += v[jrow]*x[8*idx[jrow]+6];
1290       sum8 += v[jrow]*x[8*idx[jrow]+7];
1291       jrow++;
1292      }
1293     y[8*i]   = sum1;
1294     y[8*i+1] = sum2;
1295     y[8*i+2] = sum3;
1296     y[8*i+3] = sum4;
1297     y[8*i+4] = sum5;
1298     y[8*i+5] = sum6;
1299     y[8*i+6] = sum7;
1300     y[8*i+7] = sum8;
1301   }
1302 
1303   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
1304   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1305   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1311 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1312 {
1313   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1314   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1315   const PetscScalar *x,*v;
1316   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1317   PetscErrorCode    ierr;
1318   const PetscInt    m = b->AIJ->rmap->n,*idx;
1319   PetscInt          n,i;
1320 
1321   PetscFunctionBegin;
1322   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1323   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1324   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1325 
1326   for (i=0; i<m; i++) {
1327     idx    = a->j + a->i[i] ;
1328     v      = a->a + a->i[i] ;
1329     n      = a->i[i+1] - a->i[i];
1330     alpha1 = x[8*i];
1331     alpha2 = x[8*i+1];
1332     alpha3 = x[8*i+2];
1333     alpha4 = x[8*i+3];
1334     alpha5 = x[8*i+4];
1335     alpha6 = x[8*i+5];
1336     alpha7 = x[8*i+6];
1337     alpha8 = x[8*i+7];
1338     while (n-->0) {
1339       y[8*(*idx)]   += alpha1*(*v);
1340       y[8*(*idx)+1] += alpha2*(*v);
1341       y[8*(*idx)+2] += alpha3*(*v);
1342       y[8*(*idx)+3] += alpha4*(*v);
1343       y[8*(*idx)+4] += alpha5*(*v);
1344       y[8*(*idx)+5] += alpha6*(*v);
1345       y[8*(*idx)+6] += alpha7*(*v);
1346       y[8*(*idx)+7] += alpha8*(*v);
1347       idx++; v++;
1348     }
1349   }
1350   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1351   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1352   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1358 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1359 {
1360   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1361   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1362   const PetscScalar *x,*v;
1363   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1364   PetscErrorCode    ierr;
1365   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1366   PetscInt          n,i,jrow,j;
1367 
1368   PetscFunctionBegin;
1369   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1370   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1371   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1372   idx  = a->j;
1373   v    = a->a;
1374   ii   = a->i;
1375 
1376   for (i=0; i<m; i++) {
1377     jrow = ii[i];
1378     n    = ii[i+1] - jrow;
1379     sum1  = 0.0;
1380     sum2  = 0.0;
1381     sum3  = 0.0;
1382     sum4  = 0.0;
1383     sum5  = 0.0;
1384     sum6  = 0.0;
1385     sum7  = 0.0;
1386     sum8  = 0.0;
1387     for (j=0; j<n; j++) {
1388       sum1 += v[jrow]*x[8*idx[jrow]];
1389       sum2 += v[jrow]*x[8*idx[jrow]+1];
1390       sum3 += v[jrow]*x[8*idx[jrow]+2];
1391       sum4 += v[jrow]*x[8*idx[jrow]+3];
1392       sum5 += v[jrow]*x[8*idx[jrow]+4];
1393       sum6 += v[jrow]*x[8*idx[jrow]+5];
1394       sum7 += v[jrow]*x[8*idx[jrow]+6];
1395       sum8 += v[jrow]*x[8*idx[jrow]+7];
1396       jrow++;
1397      }
1398     y[8*i]   += sum1;
1399     y[8*i+1] += sum2;
1400     y[8*i+2] += sum3;
1401     y[8*i+3] += sum4;
1402     y[8*i+4] += sum5;
1403     y[8*i+5] += sum6;
1404     y[8*i+6] += sum7;
1405     y[8*i+7] += sum8;
1406   }
1407 
1408   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1409   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1410   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1416 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1417 {
1418   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1419   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1420   const PetscScalar *x,*v;
1421   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1422   PetscErrorCode    ierr;
1423   const PetscInt    m = b->AIJ->rmap->n,*idx;
1424   PetscInt          n,i;
1425 
1426   PetscFunctionBegin;
1427   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1428   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1429   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1430   for (i=0; i<m; i++) {
1431     idx    = a->j + a->i[i] ;
1432     v      = a->a + a->i[i] ;
1433     n      = a->i[i+1] - a->i[i];
1434     alpha1 = x[8*i];
1435     alpha2 = x[8*i+1];
1436     alpha3 = x[8*i+2];
1437     alpha4 = x[8*i+3];
1438     alpha5 = x[8*i+4];
1439     alpha6 = x[8*i+5];
1440     alpha7 = x[8*i+6];
1441     alpha8 = x[8*i+7];
1442     while (n-->0) {
1443       y[8*(*idx)]   += alpha1*(*v);
1444       y[8*(*idx)+1] += alpha2*(*v);
1445       y[8*(*idx)+2] += alpha3*(*v);
1446       y[8*(*idx)+3] += alpha4*(*v);
1447       y[8*(*idx)+4] += alpha5*(*v);
1448       y[8*(*idx)+5] += alpha6*(*v);
1449       y[8*(*idx)+6] += alpha7*(*v);
1450       y[8*(*idx)+7] += alpha8*(*v);
1451       idx++; v++;
1452     }
1453   }
1454   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1455   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1456   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 /* ------------------------------------------------------------------------------*/
1461 #undef __FUNCT__
1462 #define __FUNCT__ "MatMult_SeqMAIJ_9"
1463 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1464 {
1465   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1466   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1467   const PetscScalar *x,*v;
1468   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1469   PetscErrorCode    ierr;
1470   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1471   PetscInt          nonzerorow=0,n,i,jrow,j;
1472 
1473   PetscFunctionBegin;
1474   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1475   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1476   idx  = a->j;
1477   v    = a->a;
1478   ii   = a->i;
1479 
1480   for (i=0; i<m; i++) {
1481     jrow = ii[i];
1482     n    = ii[i+1] - jrow;
1483     sum1  = 0.0;
1484     sum2  = 0.0;
1485     sum3  = 0.0;
1486     sum4  = 0.0;
1487     sum5  = 0.0;
1488     sum6  = 0.0;
1489     sum7  = 0.0;
1490     sum8  = 0.0;
1491     sum9  = 0.0;
1492     nonzerorow += (n>0);
1493     for (j=0; j<n; j++) {
1494       sum1 += v[jrow]*x[9*idx[jrow]];
1495       sum2 += v[jrow]*x[9*idx[jrow]+1];
1496       sum3 += v[jrow]*x[9*idx[jrow]+2];
1497       sum4 += v[jrow]*x[9*idx[jrow]+3];
1498       sum5 += v[jrow]*x[9*idx[jrow]+4];
1499       sum6 += v[jrow]*x[9*idx[jrow]+5];
1500       sum7 += v[jrow]*x[9*idx[jrow]+6];
1501       sum8 += v[jrow]*x[9*idx[jrow]+7];
1502       sum9 += v[jrow]*x[9*idx[jrow]+8];
1503       jrow++;
1504      }
1505     y[9*i]   = sum1;
1506     y[9*i+1] = sum2;
1507     y[9*i+2] = sum3;
1508     y[9*i+3] = sum4;
1509     y[9*i+4] = sum5;
1510     y[9*i+5] = sum6;
1511     y[9*i+6] = sum7;
1512     y[9*i+7] = sum8;
1513     y[9*i+8] = sum9;
1514   }
1515 
1516   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
1517   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1518   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 /* ------------------------------------------------------------------------------*/
1523 
1524 #undef __FUNCT__
1525 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1526 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1527 {
1528   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1529   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1530   const PetscScalar *x,*v;
1531   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1532   PetscErrorCode    ierr;
1533   const PetscInt    m = b->AIJ->rmap->n,*idx;
1534   PetscInt          n,i;
1535 
1536   PetscFunctionBegin;
1537   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1538   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1539   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1540 
1541   for (i=0; i<m; i++) {
1542     idx    = a->j + a->i[i] ;
1543     v      = a->a + a->i[i] ;
1544     n      = a->i[i+1] - a->i[i];
1545     alpha1 = x[9*i];
1546     alpha2 = x[9*i+1];
1547     alpha3 = x[9*i+2];
1548     alpha4 = x[9*i+3];
1549     alpha5 = x[9*i+4];
1550     alpha6 = x[9*i+5];
1551     alpha7 = x[9*i+6];
1552     alpha8 = x[9*i+7];
1553     alpha9 = x[9*i+8];
1554     while (n-->0) {
1555       y[9*(*idx)]   += alpha1*(*v);
1556       y[9*(*idx)+1] += alpha2*(*v);
1557       y[9*(*idx)+2] += alpha3*(*v);
1558       y[9*(*idx)+3] += alpha4*(*v);
1559       y[9*(*idx)+4] += alpha5*(*v);
1560       y[9*(*idx)+5] += alpha6*(*v);
1561       y[9*(*idx)+6] += alpha7*(*v);
1562       y[9*(*idx)+7] += alpha8*(*v);
1563       y[9*(*idx)+8] += alpha9*(*v);
1564       idx++; v++;
1565     }
1566   }
1567   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1568   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1569   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1570   PetscFunctionReturn(0);
1571 }
1572 
1573 #undef __FUNCT__
1574 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1575 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1576 {
1577   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1578   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1579   const PetscScalar *x,*v;
1580   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1581   PetscErrorCode    ierr;
1582   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1583   PetscInt          n,i,jrow,j;
1584 
1585   PetscFunctionBegin;
1586   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1587   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1588   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1589   idx  = a->j;
1590   v    = a->a;
1591   ii   = a->i;
1592 
1593   for (i=0; i<m; i++) {
1594     jrow = ii[i];
1595     n    = ii[i+1] - jrow;
1596     sum1  = 0.0;
1597     sum2  = 0.0;
1598     sum3  = 0.0;
1599     sum4  = 0.0;
1600     sum5  = 0.0;
1601     sum6  = 0.0;
1602     sum7  = 0.0;
1603     sum8  = 0.0;
1604     sum9  = 0.0;
1605     for (j=0; j<n; j++) {
1606       sum1 += v[jrow]*x[9*idx[jrow]];
1607       sum2 += v[jrow]*x[9*idx[jrow]+1];
1608       sum3 += v[jrow]*x[9*idx[jrow]+2];
1609       sum4 += v[jrow]*x[9*idx[jrow]+3];
1610       sum5 += v[jrow]*x[9*idx[jrow]+4];
1611       sum6 += v[jrow]*x[9*idx[jrow]+5];
1612       sum7 += v[jrow]*x[9*idx[jrow]+6];
1613       sum8 += v[jrow]*x[9*idx[jrow]+7];
1614       sum9 += v[jrow]*x[9*idx[jrow]+8];
1615       jrow++;
1616      }
1617     y[9*i]   += sum1;
1618     y[9*i+1] += sum2;
1619     y[9*i+2] += sum3;
1620     y[9*i+3] += sum4;
1621     y[9*i+4] += sum5;
1622     y[9*i+5] += sum6;
1623     y[9*i+6] += sum7;
1624     y[9*i+7] += sum8;
1625     y[9*i+8] += sum9;
1626   }
1627 
1628   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1629   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1630   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 #undef __FUNCT__
1635 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1636 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1637 {
1638   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1639   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1640   const PetscScalar *x,*v;
1641   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1642   PetscErrorCode    ierr;
1643   const PetscInt    m = b->AIJ->rmap->n,*idx;
1644   PetscInt          n,i;
1645 
1646   PetscFunctionBegin;
1647   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1648   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1649   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1650   for (i=0; i<m; i++) {
1651     idx    = a->j + a->i[i] ;
1652     v      = a->a + a->i[i] ;
1653     n      = a->i[i+1] - a->i[i];
1654     alpha1 = x[9*i];
1655     alpha2 = x[9*i+1];
1656     alpha3 = x[9*i+2];
1657     alpha4 = x[9*i+3];
1658     alpha5 = x[9*i+4];
1659     alpha6 = x[9*i+5];
1660     alpha7 = x[9*i+6];
1661     alpha8 = x[9*i+7];
1662     alpha9 = x[9*i+8];
1663     while (n-->0) {
1664       y[9*(*idx)]   += alpha1*(*v);
1665       y[9*(*idx)+1] += alpha2*(*v);
1666       y[9*(*idx)+2] += alpha3*(*v);
1667       y[9*(*idx)+3] += alpha4*(*v);
1668       y[9*(*idx)+4] += alpha5*(*v);
1669       y[9*(*idx)+5] += alpha6*(*v);
1670       y[9*(*idx)+6] += alpha7*(*v);
1671       y[9*(*idx)+7] += alpha8*(*v);
1672       y[9*(*idx)+8] += alpha9*(*v);
1673       idx++; v++;
1674     }
1675   }
1676   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1677   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1678   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1679   PetscFunctionReturn(0);
1680 }
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1683 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1684 {
1685   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1686   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1687   const PetscScalar *x,*v;
1688   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1689   PetscErrorCode    ierr;
1690   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1691   PetscInt          nonzerorow=0,n,i,jrow,j;
1692 
1693   PetscFunctionBegin;
1694   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1695   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1696   idx  = a->j;
1697   v    = a->a;
1698   ii   = a->i;
1699 
1700   for (i=0; i<m; i++) {
1701     jrow = ii[i];
1702     n    = ii[i+1] - jrow;
1703     sum1  = 0.0;
1704     sum2  = 0.0;
1705     sum3  = 0.0;
1706     sum4  = 0.0;
1707     sum5  = 0.0;
1708     sum6  = 0.0;
1709     sum7  = 0.0;
1710     sum8  = 0.0;
1711     sum9  = 0.0;
1712     sum10 = 0.0;
1713     nonzerorow += (n>0);
1714     for (j=0; j<n; j++) {
1715       sum1  += v[jrow]*x[10*idx[jrow]];
1716       sum2  += v[jrow]*x[10*idx[jrow]+1];
1717       sum3  += v[jrow]*x[10*idx[jrow]+2];
1718       sum4  += v[jrow]*x[10*idx[jrow]+3];
1719       sum5  += v[jrow]*x[10*idx[jrow]+4];
1720       sum6  += v[jrow]*x[10*idx[jrow]+5];
1721       sum7  += v[jrow]*x[10*idx[jrow]+6];
1722       sum8  += v[jrow]*x[10*idx[jrow]+7];
1723       sum9  += v[jrow]*x[10*idx[jrow]+8];
1724       sum10 += v[jrow]*x[10*idx[jrow]+9];
1725       jrow++;
1726      }
1727     y[10*i]   = sum1;
1728     y[10*i+1] = sum2;
1729     y[10*i+2] = sum3;
1730     y[10*i+3] = sum4;
1731     y[10*i+4] = sum5;
1732     y[10*i+5] = sum6;
1733     y[10*i+6] = sum7;
1734     y[10*i+7] = sum8;
1735     y[10*i+8] = sum9;
1736     y[10*i+9] = sum10;
1737   }
1738 
1739   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
1740   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1741   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 #undef __FUNCT__
1746 #define __FUNCT__ "MatMultAdd_SeqMAIJ_10"
1747 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1748 {
1749   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1750   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1751   const PetscScalar *x,*v;
1752   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1753   PetscErrorCode    ierr;
1754   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1755   PetscInt          n,i,jrow,j;
1756 
1757   PetscFunctionBegin;
1758   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1759   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1760   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1761   idx  = a->j;
1762   v    = a->a;
1763   ii   = a->i;
1764 
1765   for (i=0; i<m; i++) {
1766     jrow = ii[i];
1767     n    = ii[i+1] - jrow;
1768     sum1  = 0.0;
1769     sum2  = 0.0;
1770     sum3  = 0.0;
1771     sum4  = 0.0;
1772     sum5  = 0.0;
1773     sum6  = 0.0;
1774     sum7  = 0.0;
1775     sum8  = 0.0;
1776     sum9  = 0.0;
1777     sum10 = 0.0;
1778     for (j=0; j<n; j++) {
1779       sum1  += v[jrow]*x[10*idx[jrow]];
1780       sum2  += v[jrow]*x[10*idx[jrow]+1];
1781       sum3  += v[jrow]*x[10*idx[jrow]+2];
1782       sum4  += v[jrow]*x[10*idx[jrow]+3];
1783       sum5  += v[jrow]*x[10*idx[jrow]+4];
1784       sum6  += v[jrow]*x[10*idx[jrow]+5];
1785       sum7  += v[jrow]*x[10*idx[jrow]+6];
1786       sum8  += v[jrow]*x[10*idx[jrow]+7];
1787       sum9  += v[jrow]*x[10*idx[jrow]+8];
1788       sum10 += v[jrow]*x[10*idx[jrow]+9];
1789       jrow++;
1790      }
1791     y[10*i]   += sum1;
1792     y[10*i+1] += sum2;
1793     y[10*i+2] += sum3;
1794     y[10*i+3] += sum4;
1795     y[10*i+4] += sum5;
1796     y[10*i+5] += sum6;
1797     y[10*i+6] += sum7;
1798     y[10*i+7] += sum8;
1799     y[10*i+8] += sum9;
1800     y[10*i+9] += sum10;
1801   }
1802 
1803   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1804   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1805   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 #undef __FUNCT__
1810 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1811 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1812 {
1813   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1814   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1815   const PetscScalar *x,*v;
1816   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1817   PetscErrorCode    ierr;
1818   const PetscInt    m = b->AIJ->rmap->n,*idx;
1819   PetscInt          n,i;
1820 
1821   PetscFunctionBegin;
1822   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1823   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1824   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1825 
1826   for (i=0; i<m; i++) {
1827     idx    = a->j + a->i[i] ;
1828     v      = a->a + a->i[i] ;
1829     n      = a->i[i+1] - a->i[i];
1830     alpha1 = x[10*i];
1831     alpha2 = x[10*i+1];
1832     alpha3 = x[10*i+2];
1833     alpha4 = x[10*i+3];
1834     alpha5 = x[10*i+4];
1835     alpha6 = x[10*i+5];
1836     alpha7 = x[10*i+6];
1837     alpha8 = x[10*i+7];
1838     alpha9 = x[10*i+8];
1839     alpha10 = x[10*i+9];
1840     while (n-->0) {
1841       y[10*(*idx)]   += alpha1*(*v);
1842       y[10*(*idx)+1] += alpha2*(*v);
1843       y[10*(*idx)+2] += alpha3*(*v);
1844       y[10*(*idx)+3] += alpha4*(*v);
1845       y[10*(*idx)+4] += alpha5*(*v);
1846       y[10*(*idx)+5] += alpha6*(*v);
1847       y[10*(*idx)+6] += alpha7*(*v);
1848       y[10*(*idx)+7] += alpha8*(*v);
1849       y[10*(*idx)+8] += alpha9*(*v);
1850       y[10*(*idx)+9] += alpha10*(*v);
1851       idx++; v++;
1852     }
1853   }
1854   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1855   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1856   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1857   PetscFunctionReturn(0);
1858 }
1859 
1860 #undef __FUNCT__
1861 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1862 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1863 {
1864   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1865   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1866   const PetscScalar *x,*v;
1867   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1868   PetscErrorCode    ierr;
1869   const PetscInt    m = b->AIJ->rmap->n,*idx;
1870   PetscInt          n,i;
1871 
1872   PetscFunctionBegin;
1873   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1874   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1875   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1876   for (i=0; i<m; i++) {
1877     idx    = a->j + a->i[i] ;
1878     v      = a->a + a->i[i] ;
1879     n      = a->i[i+1] - a->i[i];
1880     alpha1 = x[10*i];
1881     alpha2 = x[10*i+1];
1882     alpha3 = x[10*i+2];
1883     alpha4 = x[10*i+3];
1884     alpha5 = x[10*i+4];
1885     alpha6 = x[10*i+5];
1886     alpha7 = x[10*i+6];
1887     alpha8 = x[10*i+7];
1888     alpha9 = x[10*i+8];
1889     alpha10 = x[10*i+9];
1890     while (n-->0) {
1891       y[10*(*idx)]   += alpha1*(*v);
1892       y[10*(*idx)+1] += alpha2*(*v);
1893       y[10*(*idx)+2] += alpha3*(*v);
1894       y[10*(*idx)+3] += alpha4*(*v);
1895       y[10*(*idx)+4] += alpha5*(*v);
1896       y[10*(*idx)+5] += alpha6*(*v);
1897       y[10*(*idx)+6] += alpha7*(*v);
1898       y[10*(*idx)+7] += alpha8*(*v);
1899       y[10*(*idx)+8] += alpha9*(*v);
1900       y[10*(*idx)+9] += alpha10*(*v);
1901       idx++; v++;
1902     }
1903   }
1904   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1905   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1906   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 
1911 /*--------------------------------------------------------------------------------------------*/
1912 #undef __FUNCT__
1913 #define __FUNCT__ "MatMult_SeqMAIJ_11"
1914 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1915 {
1916   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1917   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1918   const PetscScalar *x,*v;
1919   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1920   PetscErrorCode    ierr;
1921   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1922   PetscInt          nonzerorow=0,n,i,jrow,j;
1923 
1924   PetscFunctionBegin;
1925   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1926   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1927   idx  = a->j;
1928   v    = a->a;
1929   ii   = a->i;
1930 
1931   for (i=0; i<m; i++) {
1932     jrow = ii[i];
1933     n    = ii[i+1] - jrow;
1934     sum1  = 0.0;
1935     sum2  = 0.0;
1936     sum3  = 0.0;
1937     sum4  = 0.0;
1938     sum5  = 0.0;
1939     sum6  = 0.0;
1940     sum7  = 0.0;
1941     sum8  = 0.0;
1942     sum9  = 0.0;
1943     sum10 = 0.0;
1944     sum11 = 0.0;
1945     nonzerorow += (n>0);
1946     for (j=0; j<n; j++) {
1947       sum1  += v[jrow]*x[11*idx[jrow]];
1948       sum2  += v[jrow]*x[11*idx[jrow]+1];
1949       sum3  += v[jrow]*x[11*idx[jrow]+2];
1950       sum4  += v[jrow]*x[11*idx[jrow]+3];
1951       sum5  += v[jrow]*x[11*idx[jrow]+4];
1952       sum6  += v[jrow]*x[11*idx[jrow]+5];
1953       sum7  += v[jrow]*x[11*idx[jrow]+6];
1954       sum8  += v[jrow]*x[11*idx[jrow]+7];
1955       sum9  += v[jrow]*x[11*idx[jrow]+8];
1956       sum10 += v[jrow]*x[11*idx[jrow]+9];
1957       sum11 += v[jrow]*x[11*idx[jrow]+10];
1958       jrow++;
1959      }
1960     y[11*i]   = sum1;
1961     y[11*i+1] = sum2;
1962     y[11*i+2] = sum3;
1963     y[11*i+3] = sum4;
1964     y[11*i+4] = sum5;
1965     y[11*i+5] = sum6;
1966     y[11*i+6] = sum7;
1967     y[11*i+7] = sum8;
1968     y[11*i+8] = sum9;
1969     y[11*i+9] = sum10;
1970     y[11*i+10] = sum11;
1971   }
1972 
1973   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
1974   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1975   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 #undef __FUNCT__
1980 #define __FUNCT__ "MatMultAdd_SeqMAIJ_11"
1981 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1982 {
1983   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1984   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1985   const PetscScalar *x,*v;
1986   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1987   PetscErrorCode    ierr;
1988   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1989   PetscInt          n,i,jrow,j;
1990 
1991   PetscFunctionBegin;
1992   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1993   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1994   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1995   idx  = a->j;
1996   v    = a->a;
1997   ii   = a->i;
1998 
1999   for (i=0; i<m; i++) {
2000     jrow = ii[i];
2001     n    = ii[i+1] - jrow;
2002     sum1  = 0.0;
2003     sum2  = 0.0;
2004     sum3  = 0.0;
2005     sum4  = 0.0;
2006     sum5  = 0.0;
2007     sum6  = 0.0;
2008     sum7  = 0.0;
2009     sum8  = 0.0;
2010     sum9  = 0.0;
2011     sum10 = 0.0;
2012     sum11 = 0.0;
2013     for (j=0; j<n; j++) {
2014       sum1  += v[jrow]*x[11*idx[jrow]];
2015       sum2  += v[jrow]*x[11*idx[jrow]+1];
2016       sum3  += v[jrow]*x[11*idx[jrow]+2];
2017       sum4  += v[jrow]*x[11*idx[jrow]+3];
2018       sum5  += v[jrow]*x[11*idx[jrow]+4];
2019       sum6  += v[jrow]*x[11*idx[jrow]+5];
2020       sum7  += v[jrow]*x[11*idx[jrow]+6];
2021       sum8  += v[jrow]*x[11*idx[jrow]+7];
2022       sum9  += v[jrow]*x[11*idx[jrow]+8];
2023       sum10 += v[jrow]*x[11*idx[jrow]+9];
2024       sum11 += v[jrow]*x[11*idx[jrow]+10];
2025       jrow++;
2026      }
2027     y[11*i]   += sum1;
2028     y[11*i+1] += sum2;
2029     y[11*i+2] += sum3;
2030     y[11*i+3] += sum4;
2031     y[11*i+4] += sum5;
2032     y[11*i+5] += sum6;
2033     y[11*i+6] += sum7;
2034     y[11*i+7] += sum8;
2035     y[11*i+8] += sum9;
2036     y[11*i+9] += sum10;
2037     y[11*i+10] += sum11;
2038   }
2039 
2040   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2041   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2042   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 #undef __FUNCT__
2047 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11"
2048 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2049 {
2050   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2051   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2052   const PetscScalar *x,*v;
2053   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2054   PetscErrorCode    ierr;
2055   const PetscInt    m = b->AIJ->rmap->n,*idx;
2056   PetscInt          n,i;
2057 
2058   PetscFunctionBegin;
2059   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2060   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2061   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2062 
2063   for (i=0; i<m; i++) {
2064     idx    = a->j + a->i[i] ;
2065     v      = a->a + a->i[i] ;
2066     n      = a->i[i+1] - a->i[i];
2067     alpha1 = x[11*i];
2068     alpha2 = x[11*i+1];
2069     alpha3 = x[11*i+2];
2070     alpha4 = x[11*i+3];
2071     alpha5 = x[11*i+4];
2072     alpha6 = x[11*i+5];
2073     alpha7 = x[11*i+6];
2074     alpha8 = x[11*i+7];
2075     alpha9 = x[11*i+8];
2076     alpha10 = x[11*i+9];
2077     alpha11 = x[11*i+10];
2078     while (n-->0) {
2079       y[11*(*idx)]   += alpha1*(*v);
2080       y[11*(*idx)+1] += alpha2*(*v);
2081       y[11*(*idx)+2] += alpha3*(*v);
2082       y[11*(*idx)+3] += alpha4*(*v);
2083       y[11*(*idx)+4] += alpha5*(*v);
2084       y[11*(*idx)+5] += alpha6*(*v);
2085       y[11*(*idx)+6] += alpha7*(*v);
2086       y[11*(*idx)+7] += alpha8*(*v);
2087       y[11*(*idx)+8] += alpha9*(*v);
2088       y[11*(*idx)+9] += alpha10*(*v);
2089       y[11*(*idx)+10] += alpha11*(*v);
2090       idx++; v++;
2091     }
2092   }
2093   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2094   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2095   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 #undef __FUNCT__
2100 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11"
2101 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2102 {
2103   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2104   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2105   const PetscScalar *x,*v;
2106   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2107   PetscErrorCode    ierr;
2108   const PetscInt    m = b->AIJ->rmap->n,*idx;
2109   PetscInt          n,i;
2110 
2111   PetscFunctionBegin;
2112   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2113   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2114   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2115   for (i=0; i<m; i++) {
2116     idx    = a->j + a->i[i] ;
2117     v      = a->a + a->i[i] ;
2118     n      = a->i[i+1] - a->i[i];
2119     alpha1 = x[11*i];
2120     alpha2 = x[11*i+1];
2121     alpha3 = x[11*i+2];
2122     alpha4 = x[11*i+3];
2123     alpha5 = x[11*i+4];
2124     alpha6 = x[11*i+5];
2125     alpha7 = x[11*i+6];
2126     alpha8 = x[11*i+7];
2127     alpha9 = x[11*i+8];
2128     alpha10 = x[11*i+9];
2129     alpha11 = x[11*i+10];
2130     while (n-->0) {
2131       y[11*(*idx)]   += alpha1*(*v);
2132       y[11*(*idx)+1] += alpha2*(*v);
2133       y[11*(*idx)+2] += alpha3*(*v);
2134       y[11*(*idx)+3] += alpha4*(*v);
2135       y[11*(*idx)+4] += alpha5*(*v);
2136       y[11*(*idx)+5] += alpha6*(*v);
2137       y[11*(*idx)+6] += alpha7*(*v);
2138       y[11*(*idx)+7] += alpha8*(*v);
2139       y[11*(*idx)+8] += alpha9*(*v);
2140       y[11*(*idx)+9] += alpha10*(*v);
2141       y[11*(*idx)+10] += alpha11*(*v);
2142       idx++; v++;
2143     }
2144   }
2145   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2146   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2147   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 
2152 /*--------------------------------------------------------------------------------------------*/
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatMult_SeqMAIJ_16"
2155 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2156 {
2157   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2158   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2159   const PetscScalar *x,*v;
2160   PetscScalar        *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2161   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2162   PetscErrorCode     ierr;
2163   const PetscInt     m = b->AIJ->rmap->n,*idx,*ii;
2164   PetscInt           nonzerorow=0,n,i,jrow,j;
2165 
2166   PetscFunctionBegin;
2167   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2168   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2169   idx  = a->j;
2170   v    = a->a;
2171   ii   = a->i;
2172 
2173   for (i=0; i<m; i++) {
2174     jrow = ii[i];
2175     n    = ii[i+1] - jrow;
2176     sum1  = 0.0;
2177     sum2  = 0.0;
2178     sum3  = 0.0;
2179     sum4  = 0.0;
2180     sum5  = 0.0;
2181     sum6  = 0.0;
2182     sum7  = 0.0;
2183     sum8  = 0.0;
2184     sum9  = 0.0;
2185     sum10 = 0.0;
2186     sum11 = 0.0;
2187     sum12 = 0.0;
2188     sum13 = 0.0;
2189     sum14 = 0.0;
2190     sum15 = 0.0;
2191     sum16 = 0.0;
2192     nonzerorow += (n>0);
2193     for (j=0; j<n; j++) {
2194       sum1  += v[jrow]*x[16*idx[jrow]];
2195       sum2  += v[jrow]*x[16*idx[jrow]+1];
2196       sum3  += v[jrow]*x[16*idx[jrow]+2];
2197       sum4  += v[jrow]*x[16*idx[jrow]+3];
2198       sum5  += v[jrow]*x[16*idx[jrow]+4];
2199       sum6  += v[jrow]*x[16*idx[jrow]+5];
2200       sum7  += v[jrow]*x[16*idx[jrow]+6];
2201       sum8  += v[jrow]*x[16*idx[jrow]+7];
2202       sum9  += v[jrow]*x[16*idx[jrow]+8];
2203       sum10 += v[jrow]*x[16*idx[jrow]+9];
2204       sum11 += v[jrow]*x[16*idx[jrow]+10];
2205       sum12 += v[jrow]*x[16*idx[jrow]+11];
2206       sum13 += v[jrow]*x[16*idx[jrow]+12];
2207       sum14 += v[jrow]*x[16*idx[jrow]+13];
2208       sum15 += v[jrow]*x[16*idx[jrow]+14];
2209       sum16 += v[jrow]*x[16*idx[jrow]+15];
2210       jrow++;
2211      }
2212     y[16*i]    = sum1;
2213     y[16*i+1]  = sum2;
2214     y[16*i+2]  = sum3;
2215     y[16*i+3]  = sum4;
2216     y[16*i+4]  = sum5;
2217     y[16*i+5]  = sum6;
2218     y[16*i+6]  = sum7;
2219     y[16*i+7]  = sum8;
2220     y[16*i+8]  = sum9;
2221     y[16*i+9]  = sum10;
2222     y[16*i+10] = sum11;
2223     y[16*i+11] = sum12;
2224     y[16*i+12] = sum13;
2225     y[16*i+13] = sum14;
2226     y[16*i+14] = sum15;
2227     y[16*i+15] = sum16;
2228   }
2229 
2230   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
2231   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2232   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 #undef __FUNCT__
2237 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
2238 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2239 {
2240   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2241   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2242   const PetscScalar *x,*v;
2243   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2244   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2245   PetscErrorCode    ierr;
2246   const PetscInt    m = b->AIJ->rmap->n,*idx;
2247   PetscInt          n,i;
2248 
2249   PetscFunctionBegin;
2250   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2251   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2252   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2253 
2254   for (i=0; i<m; i++) {
2255     idx    = a->j + a->i[i] ;
2256     v      = a->a + a->i[i] ;
2257     n      = a->i[i+1] - a->i[i];
2258     alpha1  = x[16*i];
2259     alpha2  = x[16*i+1];
2260     alpha3  = x[16*i+2];
2261     alpha4  = x[16*i+3];
2262     alpha5  = x[16*i+4];
2263     alpha6  = x[16*i+5];
2264     alpha7  = x[16*i+6];
2265     alpha8  = x[16*i+7];
2266     alpha9  = x[16*i+8];
2267     alpha10 = x[16*i+9];
2268     alpha11 = x[16*i+10];
2269     alpha12 = x[16*i+11];
2270     alpha13 = x[16*i+12];
2271     alpha14 = x[16*i+13];
2272     alpha15 = x[16*i+14];
2273     alpha16 = x[16*i+15];
2274     while (n-->0) {
2275       y[16*(*idx)]    += alpha1*(*v);
2276       y[16*(*idx)+1]  += alpha2*(*v);
2277       y[16*(*idx)+2]  += alpha3*(*v);
2278       y[16*(*idx)+3]  += alpha4*(*v);
2279       y[16*(*idx)+4]  += alpha5*(*v);
2280       y[16*(*idx)+5]  += alpha6*(*v);
2281       y[16*(*idx)+6]  += alpha7*(*v);
2282       y[16*(*idx)+7]  += alpha8*(*v);
2283       y[16*(*idx)+8]  += alpha9*(*v);
2284       y[16*(*idx)+9]  += alpha10*(*v);
2285       y[16*(*idx)+10] += alpha11*(*v);
2286       y[16*(*idx)+11] += alpha12*(*v);
2287       y[16*(*idx)+12] += alpha13*(*v);
2288       y[16*(*idx)+13] += alpha14*(*v);
2289       y[16*(*idx)+14] += alpha15*(*v);
2290       y[16*(*idx)+15] += alpha16*(*v);
2291       idx++; v++;
2292     }
2293   }
2294   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2295   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2296   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2297   PetscFunctionReturn(0);
2298 }
2299 
2300 #undef __FUNCT__
2301 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
2302 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2303 {
2304   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2305   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2306   const PetscScalar *x,*v;
2307   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2308   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2309   PetscErrorCode    ierr;
2310   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2311   PetscInt          n,i,jrow,j;
2312 
2313   PetscFunctionBegin;
2314   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2315   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2316   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2317   idx  = a->j;
2318   v    = a->a;
2319   ii   = a->i;
2320 
2321   for (i=0; i<m; i++) {
2322     jrow = ii[i];
2323     n    = ii[i+1] - jrow;
2324     sum1  = 0.0;
2325     sum2  = 0.0;
2326     sum3  = 0.0;
2327     sum4  = 0.0;
2328     sum5  = 0.0;
2329     sum6  = 0.0;
2330     sum7  = 0.0;
2331     sum8  = 0.0;
2332     sum9  = 0.0;
2333     sum10 = 0.0;
2334     sum11 = 0.0;
2335     sum12 = 0.0;
2336     sum13 = 0.0;
2337     sum14 = 0.0;
2338     sum15 = 0.0;
2339     sum16 = 0.0;
2340     for (j=0; j<n; j++) {
2341       sum1  += v[jrow]*x[16*idx[jrow]];
2342       sum2  += v[jrow]*x[16*idx[jrow]+1];
2343       sum3  += v[jrow]*x[16*idx[jrow]+2];
2344       sum4  += v[jrow]*x[16*idx[jrow]+3];
2345       sum5  += v[jrow]*x[16*idx[jrow]+4];
2346       sum6  += v[jrow]*x[16*idx[jrow]+5];
2347       sum7  += v[jrow]*x[16*idx[jrow]+6];
2348       sum8  += v[jrow]*x[16*idx[jrow]+7];
2349       sum9  += v[jrow]*x[16*idx[jrow]+8];
2350       sum10 += v[jrow]*x[16*idx[jrow]+9];
2351       sum11 += v[jrow]*x[16*idx[jrow]+10];
2352       sum12 += v[jrow]*x[16*idx[jrow]+11];
2353       sum13 += v[jrow]*x[16*idx[jrow]+12];
2354       sum14 += v[jrow]*x[16*idx[jrow]+13];
2355       sum15 += v[jrow]*x[16*idx[jrow]+14];
2356       sum16 += v[jrow]*x[16*idx[jrow]+15];
2357       jrow++;
2358      }
2359     y[16*i]    += sum1;
2360     y[16*i+1]  += sum2;
2361     y[16*i+2]  += sum3;
2362     y[16*i+3]  += sum4;
2363     y[16*i+4]  += sum5;
2364     y[16*i+5]  += sum6;
2365     y[16*i+6]  += sum7;
2366     y[16*i+7]  += sum8;
2367     y[16*i+8]  += sum9;
2368     y[16*i+9]  += sum10;
2369     y[16*i+10] += sum11;
2370     y[16*i+11] += sum12;
2371     y[16*i+12] += sum13;
2372     y[16*i+13] += sum14;
2373     y[16*i+14] += sum15;
2374     y[16*i+15] += sum16;
2375   }
2376 
2377   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2378   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2379   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2385 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2386 {
2387   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2388   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2389   const PetscScalar *x,*v;
2390   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2391   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2392   PetscErrorCode    ierr;
2393   const PetscInt    m = b->AIJ->rmap->n,*idx;
2394   PetscInt          n,i;
2395 
2396   PetscFunctionBegin;
2397   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2398   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2399   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2400   for (i=0; i<m; i++) {
2401     idx    = a->j + a->i[i] ;
2402     v      = a->a + a->i[i] ;
2403     n      = a->i[i+1] - a->i[i];
2404     alpha1 = x[16*i];
2405     alpha2 = x[16*i+1];
2406     alpha3 = x[16*i+2];
2407     alpha4 = x[16*i+3];
2408     alpha5 = x[16*i+4];
2409     alpha6 = x[16*i+5];
2410     alpha7 = x[16*i+6];
2411     alpha8 = x[16*i+7];
2412     alpha9  = x[16*i+8];
2413     alpha10 = x[16*i+9];
2414     alpha11 = x[16*i+10];
2415     alpha12 = x[16*i+11];
2416     alpha13 = x[16*i+12];
2417     alpha14 = x[16*i+13];
2418     alpha15 = x[16*i+14];
2419     alpha16 = x[16*i+15];
2420     while (n-->0) {
2421       y[16*(*idx)]   += alpha1*(*v);
2422       y[16*(*idx)+1] += alpha2*(*v);
2423       y[16*(*idx)+2] += alpha3*(*v);
2424       y[16*(*idx)+3] += alpha4*(*v);
2425       y[16*(*idx)+4] += alpha5*(*v);
2426       y[16*(*idx)+5] += alpha6*(*v);
2427       y[16*(*idx)+6] += alpha7*(*v);
2428       y[16*(*idx)+7] += alpha8*(*v);
2429       y[16*(*idx)+8]  += alpha9*(*v);
2430       y[16*(*idx)+9]  += alpha10*(*v);
2431       y[16*(*idx)+10] += alpha11*(*v);
2432       y[16*(*idx)+11] += alpha12*(*v);
2433       y[16*(*idx)+12] += alpha13*(*v);
2434       y[16*(*idx)+13] += alpha14*(*v);
2435       y[16*(*idx)+14] += alpha15*(*v);
2436       y[16*(*idx)+15] += alpha16*(*v);
2437       idx++; v++;
2438     }
2439   }
2440   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2441   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2442   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 /*--------------------------------------------------------------------------------------------*/
2447 #undef __FUNCT__
2448 #define __FUNCT__ "MatMult_SeqMAIJ_18"
2449 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2450 {
2451   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2452   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2453   const PetscScalar *x,*v;
2454   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2455   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2456   PetscErrorCode    ierr;
2457   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2458   PetscInt          nonzerorow=0,n,i,jrow,j;
2459 
2460   PetscFunctionBegin;
2461   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2462   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2463   idx  = a->j;
2464   v    = a->a;
2465   ii   = a->i;
2466 
2467   for (i=0; i<m; i++) {
2468     jrow = ii[i];
2469     n    = ii[i+1] - jrow;
2470     sum1  = 0.0;
2471     sum2  = 0.0;
2472     sum3  = 0.0;
2473     sum4  = 0.0;
2474     sum5  = 0.0;
2475     sum6  = 0.0;
2476     sum7  = 0.0;
2477     sum8  = 0.0;
2478     sum9  = 0.0;
2479     sum10 = 0.0;
2480     sum11 = 0.0;
2481     sum12 = 0.0;
2482     sum13 = 0.0;
2483     sum14 = 0.0;
2484     sum15 = 0.0;
2485     sum16 = 0.0;
2486     sum17 = 0.0;
2487     sum18 = 0.0;
2488     nonzerorow += (n>0);
2489     for (j=0; j<n; j++) {
2490       sum1  += v[jrow]*x[18*idx[jrow]];
2491       sum2  += v[jrow]*x[18*idx[jrow]+1];
2492       sum3  += v[jrow]*x[18*idx[jrow]+2];
2493       sum4  += v[jrow]*x[18*idx[jrow]+3];
2494       sum5  += v[jrow]*x[18*idx[jrow]+4];
2495       sum6  += v[jrow]*x[18*idx[jrow]+5];
2496       sum7  += v[jrow]*x[18*idx[jrow]+6];
2497       sum8  += v[jrow]*x[18*idx[jrow]+7];
2498       sum9  += v[jrow]*x[18*idx[jrow]+8];
2499       sum10 += v[jrow]*x[18*idx[jrow]+9];
2500       sum11 += v[jrow]*x[18*idx[jrow]+10];
2501       sum12 += v[jrow]*x[18*idx[jrow]+11];
2502       sum13 += v[jrow]*x[18*idx[jrow]+12];
2503       sum14 += v[jrow]*x[18*idx[jrow]+13];
2504       sum15 += v[jrow]*x[18*idx[jrow]+14];
2505       sum16 += v[jrow]*x[18*idx[jrow]+15];
2506       sum17 += v[jrow]*x[18*idx[jrow]+16];
2507       sum18 += v[jrow]*x[18*idx[jrow]+17];
2508       jrow++;
2509      }
2510     y[18*i]    = sum1;
2511     y[18*i+1]  = sum2;
2512     y[18*i+2]  = sum3;
2513     y[18*i+3]  = sum4;
2514     y[18*i+4]  = sum5;
2515     y[18*i+5]  = sum6;
2516     y[18*i+6]  = sum7;
2517     y[18*i+7]  = sum8;
2518     y[18*i+8]  = sum9;
2519     y[18*i+9]  = sum10;
2520     y[18*i+10] = sum11;
2521     y[18*i+11] = sum12;
2522     y[18*i+12] = sum13;
2523     y[18*i+13] = sum14;
2524     y[18*i+14] = sum15;
2525     y[18*i+15] = sum16;
2526     y[18*i+16] = sum17;
2527     y[18*i+17] = sum18;
2528   }
2529 
2530   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
2531   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2532   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 #undef __FUNCT__
2537 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18"
2538 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2539 {
2540   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2541   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2542   const PetscScalar *x,*v;
2543   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2544   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2545   PetscErrorCode    ierr;
2546   const PetscInt    m = b->AIJ->rmap->n,*idx;
2547   PetscInt          n,i;
2548 
2549   PetscFunctionBegin;
2550   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2551   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2552   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2553 
2554   for (i=0; i<m; i++) {
2555     idx    = a->j + a->i[i] ;
2556     v      = a->a + a->i[i] ;
2557     n      = a->i[i+1] - a->i[i];
2558     alpha1  = x[18*i];
2559     alpha2  = x[18*i+1];
2560     alpha3  = x[18*i+2];
2561     alpha4  = x[18*i+3];
2562     alpha5  = x[18*i+4];
2563     alpha6  = x[18*i+5];
2564     alpha7  = x[18*i+6];
2565     alpha8  = x[18*i+7];
2566     alpha9  = x[18*i+8];
2567     alpha10 = x[18*i+9];
2568     alpha11 = x[18*i+10];
2569     alpha12 = x[18*i+11];
2570     alpha13 = x[18*i+12];
2571     alpha14 = x[18*i+13];
2572     alpha15 = x[18*i+14];
2573     alpha16 = x[18*i+15];
2574     alpha17 = x[18*i+16];
2575     alpha18 = x[18*i+17];
2576     while (n-->0) {
2577       y[18*(*idx)]    += alpha1*(*v);
2578       y[18*(*idx)+1]  += alpha2*(*v);
2579       y[18*(*idx)+2]  += alpha3*(*v);
2580       y[18*(*idx)+3]  += alpha4*(*v);
2581       y[18*(*idx)+4]  += alpha5*(*v);
2582       y[18*(*idx)+5]  += alpha6*(*v);
2583       y[18*(*idx)+6]  += alpha7*(*v);
2584       y[18*(*idx)+7]  += alpha8*(*v);
2585       y[18*(*idx)+8]  += alpha9*(*v);
2586       y[18*(*idx)+9]  += alpha10*(*v);
2587       y[18*(*idx)+10] += alpha11*(*v);
2588       y[18*(*idx)+11] += alpha12*(*v);
2589       y[18*(*idx)+12] += alpha13*(*v);
2590       y[18*(*idx)+13] += alpha14*(*v);
2591       y[18*(*idx)+14] += alpha15*(*v);
2592       y[18*(*idx)+15] += alpha16*(*v);
2593       y[18*(*idx)+16] += alpha17*(*v);
2594       y[18*(*idx)+17] += alpha18*(*v);
2595       idx++; v++;
2596     }
2597   }
2598   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2599   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2600   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2601   PetscFunctionReturn(0);
2602 }
2603 
2604 #undef __FUNCT__
2605 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18"
2606 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2607 {
2608   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2609   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2610   const PetscScalar *x,*v;
2611   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2612   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2613   PetscErrorCode    ierr;
2614   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2615   PetscInt          n,i,jrow,j;
2616 
2617   PetscFunctionBegin;
2618   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2619   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2620   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2621   idx  = a->j;
2622   v    = a->a;
2623   ii   = a->i;
2624 
2625   for (i=0; i<m; i++) {
2626     jrow = ii[i];
2627     n    = ii[i+1] - jrow;
2628     sum1  = 0.0;
2629     sum2  = 0.0;
2630     sum3  = 0.0;
2631     sum4  = 0.0;
2632     sum5  = 0.0;
2633     sum6  = 0.0;
2634     sum7  = 0.0;
2635     sum8  = 0.0;
2636     sum9  = 0.0;
2637     sum10 = 0.0;
2638     sum11 = 0.0;
2639     sum12 = 0.0;
2640     sum13 = 0.0;
2641     sum14 = 0.0;
2642     sum15 = 0.0;
2643     sum16 = 0.0;
2644     sum17 = 0.0;
2645     sum18 = 0.0;
2646     for (j=0; j<n; j++) {
2647       sum1  += v[jrow]*x[18*idx[jrow]];
2648       sum2  += v[jrow]*x[18*idx[jrow]+1];
2649       sum3  += v[jrow]*x[18*idx[jrow]+2];
2650       sum4  += v[jrow]*x[18*idx[jrow]+3];
2651       sum5  += v[jrow]*x[18*idx[jrow]+4];
2652       sum6  += v[jrow]*x[18*idx[jrow]+5];
2653       sum7  += v[jrow]*x[18*idx[jrow]+6];
2654       sum8  += v[jrow]*x[18*idx[jrow]+7];
2655       sum9  += v[jrow]*x[18*idx[jrow]+8];
2656       sum10 += v[jrow]*x[18*idx[jrow]+9];
2657       sum11 += v[jrow]*x[18*idx[jrow]+10];
2658       sum12 += v[jrow]*x[18*idx[jrow]+11];
2659       sum13 += v[jrow]*x[18*idx[jrow]+12];
2660       sum14 += v[jrow]*x[18*idx[jrow]+13];
2661       sum15 += v[jrow]*x[18*idx[jrow]+14];
2662       sum16 += v[jrow]*x[18*idx[jrow]+15];
2663       sum17 += v[jrow]*x[18*idx[jrow]+16];
2664       sum18 += v[jrow]*x[18*idx[jrow]+17];
2665       jrow++;
2666      }
2667     y[18*i]    += sum1;
2668     y[18*i+1]  += sum2;
2669     y[18*i+2]  += sum3;
2670     y[18*i+3]  += sum4;
2671     y[18*i+4]  += sum5;
2672     y[18*i+5]  += sum6;
2673     y[18*i+6]  += sum7;
2674     y[18*i+7]  += sum8;
2675     y[18*i+8]  += sum9;
2676     y[18*i+9]  += sum10;
2677     y[18*i+10] += sum11;
2678     y[18*i+11] += sum12;
2679     y[18*i+12] += sum13;
2680     y[18*i+13] += sum14;
2681     y[18*i+14] += sum15;
2682     y[18*i+15] += sum16;
2683     y[18*i+16] += sum17;
2684     y[18*i+17] += sum18;
2685   }
2686 
2687   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2688   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2689   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2690   PetscFunctionReturn(0);
2691 }
2692 
2693 #undef __FUNCT__
2694 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18"
2695 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2696 {
2697   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2698   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2699   const PetscScalar *x,*v;
2700   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2701   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2702   PetscErrorCode    ierr;
2703   const PetscInt    m = b->AIJ->rmap->n,*idx;
2704   PetscInt          n,i;
2705 
2706   PetscFunctionBegin;
2707   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2708   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2709   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2710   for (i=0; i<m; i++) {
2711     idx    = a->j + a->i[i] ;
2712     v      = a->a + a->i[i] ;
2713     n      = a->i[i+1] - a->i[i];
2714     alpha1 = x[18*i];
2715     alpha2 = x[18*i+1];
2716     alpha3 = x[18*i+2];
2717     alpha4 = x[18*i+3];
2718     alpha5 = x[18*i+4];
2719     alpha6 = x[18*i+5];
2720     alpha7 = x[18*i+6];
2721     alpha8 = x[18*i+7];
2722     alpha9  = x[18*i+8];
2723     alpha10 = x[18*i+9];
2724     alpha11 = x[18*i+10];
2725     alpha12 = x[18*i+11];
2726     alpha13 = x[18*i+12];
2727     alpha14 = x[18*i+13];
2728     alpha15 = x[18*i+14];
2729     alpha16 = x[18*i+15];
2730     alpha17 = x[18*i+16];
2731     alpha18 = x[18*i+17];
2732     while (n-->0) {
2733       y[18*(*idx)]   += alpha1*(*v);
2734       y[18*(*idx)+1] += alpha2*(*v);
2735       y[18*(*idx)+2] += alpha3*(*v);
2736       y[18*(*idx)+3] += alpha4*(*v);
2737       y[18*(*idx)+4] += alpha5*(*v);
2738       y[18*(*idx)+5] += alpha6*(*v);
2739       y[18*(*idx)+6] += alpha7*(*v);
2740       y[18*(*idx)+7] += alpha8*(*v);
2741       y[18*(*idx)+8]  += alpha9*(*v);
2742       y[18*(*idx)+9]  += alpha10*(*v);
2743       y[18*(*idx)+10] += alpha11*(*v);
2744       y[18*(*idx)+11] += alpha12*(*v);
2745       y[18*(*idx)+12] += alpha13*(*v);
2746       y[18*(*idx)+13] += alpha14*(*v);
2747       y[18*(*idx)+14] += alpha15*(*v);
2748       y[18*(*idx)+15] += alpha16*(*v);
2749       y[18*(*idx)+16] += alpha17*(*v);
2750       y[18*(*idx)+17] += alpha18*(*v);
2751       idx++; v++;
2752     }
2753   }
2754   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2755   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2756   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2757   PetscFunctionReturn(0);
2758 }
2759 
2760 #undef __FUNCT__
2761 #define __FUNCT__ "MatMult_SeqMAIJ_N"
2762 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2763 {
2764   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2765   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2766   const PetscScalar *x,*v;
2767   PetscScalar       *y,*sums;
2768   PetscErrorCode    ierr;
2769   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2770   PetscInt          n,i,jrow,j,dof = b->dof,k;
2771 
2772   PetscFunctionBegin;
2773   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2774   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2775   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2776   idx  = a->j;
2777   v    = a->a;
2778   ii   = a->i;
2779 
2780   for (i=0; i<m; i++) {
2781     jrow = ii[i];
2782     n    = ii[i+1] - jrow;
2783     sums = y + dof*i;
2784     for (j=0; j<n; j++) {
2785       for (k=0; k<dof; k++) {
2786         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
2787       }
2788       jrow++;
2789     }
2790   }
2791 
2792   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2793   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2794   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2795   PetscFunctionReturn(0);
2796 }
2797 
2798 #undef __FUNCT__
2799 #define __FUNCT__ "MatMultAdd_SeqMAIJ_N"
2800 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2801 {
2802   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2803   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2804   const PetscScalar *x,*v;
2805   PetscScalar       *y,*sums;
2806   PetscErrorCode    ierr;
2807   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2808   PetscInt          n,i,jrow,j,dof = b->dof,k;
2809 
2810   PetscFunctionBegin;
2811   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2812   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2813   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2814   idx  = a->j;
2815   v    = a->a;
2816   ii   = a->i;
2817 
2818   for (i=0; i<m; i++) {
2819     jrow = ii[i];
2820     n    = ii[i+1] - jrow;
2821     sums = y + dof*i;
2822     for (j=0; j<n; j++) {
2823       for (k=0; k<dof; k++) {
2824         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
2825       }
2826       jrow++;
2827     }
2828   }
2829 
2830   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2831   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2832   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2833   PetscFunctionReturn(0);
2834 }
2835 
2836 #undef __FUNCT__
2837 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N"
2838 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2839 {
2840   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2841   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2842   const PetscScalar *x,*v,*alpha;
2843   PetscScalar       *y;
2844   PetscErrorCode    ierr;
2845   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2846   PetscInt          n,i,k;
2847 
2848   PetscFunctionBegin;
2849   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2850   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2851   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2852   for (i=0; i<m; i++) {
2853     idx    = a->j + a->i[i] ;
2854     v      = a->a + a->i[i] ;
2855     n      = a->i[i+1] - a->i[i];
2856     alpha  = x + dof*i;
2857     while (n-->0) {
2858       for (k=0; k<dof; k++) {
2859         y[dof*(*idx)+k] += alpha[k]*(*v);
2860       }
2861       idx++; v++;
2862     }
2863   }
2864   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2865   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2866   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2867   PetscFunctionReturn(0);
2868 }
2869 
2870 #undef __FUNCT__
2871 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N"
2872 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2873 {
2874   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2875   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2876   const PetscScalar *x,*v,*alpha;
2877   PetscScalar       *y;
2878   PetscErrorCode    ierr;
2879   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2880   PetscInt          n,i,k;
2881 
2882   PetscFunctionBegin;
2883   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2884   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2885   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2886   for (i=0; i<m; i++) {
2887     idx    = a->j + a->i[i] ;
2888     v      = a->a + a->i[i] ;
2889     n      = a->i[i+1] - a->i[i];
2890     alpha  = x + dof*i;
2891     while (n-->0) {
2892       for (k=0; k<dof; k++) {
2893         y[dof*(*idx)+k] += alpha[k]*(*v);
2894       }
2895       idx++; v++;
2896     }
2897   }
2898   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2899   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2900   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2901   PetscFunctionReturn(0);
2902 }
2903 
2904 /*===================================================================================*/
2905 #undef __FUNCT__
2906 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2907 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2908 {
2909   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2910   PetscErrorCode ierr;
2911 
2912   PetscFunctionBegin;
2913   /* start the scatter */
2914   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2915   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2916   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2917   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2918   PetscFunctionReturn(0);
2919 }
2920 
2921 #undef __FUNCT__
2922 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2923 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2924 {
2925   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2926   PetscErrorCode ierr;
2927 
2928   PetscFunctionBegin;
2929   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2930   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2931   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2932   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2933   PetscFunctionReturn(0);
2934 }
2935 
2936 #undef __FUNCT__
2937 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2938 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2939 {
2940   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2941   PetscErrorCode ierr;
2942 
2943   PetscFunctionBegin;
2944   /* start the scatter */
2945   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2946   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2947   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2948   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2949   PetscFunctionReturn(0);
2950 }
2951 
2952 #undef __FUNCT__
2953 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2954 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2955 {
2956   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2957   PetscErrorCode ierr;
2958 
2959   PetscFunctionBegin;
2960   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2961   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2962   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2963   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2964   PetscFunctionReturn(0);
2965 }
2966 
2967 /* ----------------------------------------------------------------*/
2968 #undef __FUNCT__
2969 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
2970 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2971 {
2972   PetscErrorCode     ierr;
2973   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2974   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2975   Mat                P=pp->AIJ;
2976   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2977   PetscInt           *pti,*ptj,*ptJ;
2978   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2979   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2980   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2981   MatScalar          *ca;
2982   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2983 
2984   PetscFunctionBegin;
2985   /* Get ij structure of P^T */
2986   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2987 
2988   cn = pn*ppdof;
2989   /* Allocate ci array, arrays for fill computation and */
2990   /* free space for accumulating nonzero column info */
2991   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
2992   ci[0] = 0;
2993 
2994   /* Work arrays for rows of P^T*A */
2995   ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr);
2996   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
2997   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
2998 
2999   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
3000   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
3001   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3002   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);CHKERRQ(ierr);
3003   current_space = free_space;
3004 
3005   /* Determine symbolic info for each row of C: */
3006   for (i=0;i<pn;i++) {
3007     ptnzi  = pti[i+1] - pti[i];
3008     ptJ    = ptj + pti[i];
3009     for (dof=0;dof<ppdof;dof++) {
3010       ptanzi = 0;
3011       /* Determine symbolic row of PtA: */
3012       for (j=0;j<ptnzi;j++) {
3013         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
3014         arow = ptJ[j]*ppdof + dof;
3015         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
3016         anzj = ai[arow+1] - ai[arow];
3017         ajj  = aj + ai[arow];
3018         for (k=0;k<anzj;k++) {
3019           if (!ptadenserow[ajj[k]]) {
3020             ptadenserow[ajj[k]]    = -1;
3021             ptasparserow[ptanzi++] = ajj[k];
3022           }
3023         }
3024       }
3025       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
3026       ptaj = ptasparserow;
3027       cnzi   = 0;
3028       for (j=0;j<ptanzi;j++) {
3029         /* Get offset within block of P */
3030         pshift = *ptaj%ppdof;
3031         /* Get block row of P */
3032         prow = (*ptaj++)/ppdof; /* integer division */
3033         /* P has same number of nonzeros per row as the compressed form */
3034         pnzj = pi[prow+1] - pi[prow];
3035         pjj  = pj + pi[prow];
3036         for (k=0;k<pnzj;k++) {
3037           /* Locations in C are shifted by the offset within the block */
3038           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3039           if (!denserow[pjj[k]*ppdof+pshift]) {
3040             denserow[pjj[k]*ppdof+pshift] = -1;
3041             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
3042           }
3043         }
3044       }
3045 
3046       /* sort sparserow */
3047       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
3048 
3049       /* If free space is not available, make more free space */
3050       /* Double the amount of total space in the list */
3051       if (current_space->local_remaining<cnzi) {
3052         ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
3053       }
3054 
3055       /* Copy data into free space, and zero out denserows */
3056       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
3057       current_space->array           += cnzi;
3058       current_space->local_used      += cnzi;
3059       current_space->local_remaining -= cnzi;
3060 
3061       for (j=0;j<ptanzi;j++) {
3062         ptadenserow[ptasparserow[j]] = 0;
3063       }
3064       for (j=0;j<cnzi;j++) {
3065         denserow[sparserow[j]] = 0;
3066       }
3067       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3068       /*        For now, we will recompute what is needed. */
3069       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3070     }
3071   }
3072   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3073   /* Allocate space for cj, initialize cj, and */
3074   /* destroy list of free space and other temporary array(s) */
3075   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3076   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
3077   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
3078 
3079   /* Allocate space for ca */
3080   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
3081   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
3082 
3083   /* put together the new matrix */
3084   ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
3085 
3086   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3087   /* Since these are PETSc arrays, change flags to free them as necessary. */
3088   c          = (Mat_SeqAIJ *)((*C)->data);
3089   c->free_a  = PETSC_TRUE;
3090   c->free_ij = PETSC_TRUE;
3091   c->nonew   = 0;
3092   (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3093 
3094   /* Clean up. */
3095   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
3096   PetscFunctionReturn(0);
3097 }
3098 
3099 #undef __FUNCT__
3100 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
3101 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3102 {
3103   /* This routine requires testing -- first draft only */
3104   PetscErrorCode  ierr;
3105   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3106   Mat             P=pp->AIJ;
3107   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *) A->data;
3108   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *) P->data;
3109   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *) C->data;
3110   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3111   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3112   const PetscInt  am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3113   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3114   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3115   MatScalar       *ca=c->a,*caj,*apa;
3116 
3117   PetscFunctionBegin;
3118   /* Allocate temporary array for storage of one row of A*P */
3119   ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr);
3120   ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr);
3121   ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr);
3122   ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr);
3123 
3124   /* Clear old values in C */
3125   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
3126 
3127   for (i=0;i<am;i++) {
3128     /* Form sparse row of A*P */
3129     anzi  = ai[i+1] - ai[i];
3130     apnzj = 0;
3131     for (j=0;j<anzi;j++) {
3132       /* Get offset within block of P */
3133       pshift = *aj%ppdof;
3134       /* Get block row of P */
3135       prow   = *aj++/ppdof; /* integer division */
3136       pnzj = pi[prow+1] - pi[prow];
3137       pjj  = pj + pi[prow];
3138       paj  = pa + pi[prow];
3139       for (k=0;k<pnzj;k++) {
3140         poffset = pjj[k]*ppdof+pshift;
3141         if (!apjdense[poffset]) {
3142           apjdense[poffset] = -1;
3143           apj[apnzj++]      = poffset;
3144         }
3145         apa[poffset] += (*aa)*paj[k];
3146       }
3147       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
3148       aa++;
3149     }
3150 
3151     /* Sort the j index array for quick sparse axpy. */
3152     /* Note: a array does not need sorting as it is in dense storage locations. */
3153     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
3154 
3155     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3156     prow    = i/ppdof; /* integer division */
3157     pshift  = i%ppdof;
3158     poffset = pi[prow];
3159     pnzi = pi[prow+1] - poffset;
3160     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3161     pJ   = pj+poffset;
3162     pA   = pa+poffset;
3163     for (j=0;j<pnzi;j++) {
3164       crow   = (*pJ)*ppdof+pshift;
3165       cjj    = cj + ci[crow];
3166       caj    = ca + ci[crow];
3167       pJ++;
3168       /* Perform sparse axpy operation.  Note cjj includes apj. */
3169       for (k=0,nextap=0;nextap<apnzj;k++) {
3170         if (cjj[k]==apj[nextap]) {
3171           caj[k] += (*pA)*apa[apj[nextap++]];
3172         }
3173       }
3174       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
3175       pA++;
3176     }
3177 
3178     /* Zero the current row info for A*P */
3179     for (j=0;j<apnzj;j++) {
3180       apa[apj[j]]      = 0.;
3181       apjdense[apj[j]] = 0;
3182     }
3183   }
3184 
3185   /* Assemble the final matrix and clean up */
3186   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3187   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3188   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
3189   PetscFunctionReturn(0);
3190 }
3191 
3192 #undef __FUNCT__
3193 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqMAIJ"
3194 PETSC_EXTERN_C PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3195 {
3196   PetscErrorCode ierr;
3197 
3198   PetscFunctionBegin;
3199   if (scall == MAT_INITIAL_MATRIX) {
3200     ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr);
3201   }
3202   ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr);
3203   PetscFunctionReturn(0);
3204 }
3205 
3206 #undef __FUNCT__
3207 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ"
3208 PETSC_EXTERN_C PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3209 {
3210   PetscErrorCode    ierr;
3211 
3212   PetscFunctionBegin;
3213   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3214   ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr);
3215   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
3216   PetscFunctionReturn(0);
3217 }
3218 
3219 #undef __FUNCT__
3220 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ"
3221 PETSC_EXTERN_C PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3222 {
3223   PetscFunctionBegin;
3224   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3225   PetscFunctionReturn(0);
3226 }
3227 
3228 #undef __FUNCT__
3229 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIMAIJ"
3230 PETSC_EXTERN_C PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3231 {
3232   PetscErrorCode ierr;
3233 
3234   PetscFunctionBegin;
3235   if (scall == MAT_INITIAL_MATRIX) {
3236     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr);
3237   }
3238   ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr);
3239   PetscFunctionReturn(0);
3240 }
3241 
3242 #undef __FUNCT__
3243 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
3244 PETSC_EXTERN_C PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3245 {
3246   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
3247   Mat               a = b->AIJ,B;
3248   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
3249   PetscErrorCode    ierr;
3250   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3251   PetscInt          *cols;
3252   PetscScalar       *vals;
3253 
3254   PetscFunctionBegin;
3255   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3256   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
3257   for (i=0; i<m; i++) {
3258     nmax = PetscMax(nmax,aij->ilen[i]);
3259     for (j=0; j<dof; j++) {
3260       ilen[dof*i+j] = aij->ilen[i];
3261     }
3262   }
3263   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
3264   ierr = PetscFree(ilen);CHKERRQ(ierr);
3265   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
3266   ii   = 0;
3267   for (i=0; i<m; i++) {
3268     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3269     for (j=0; j<dof; j++) {
3270       for (k=0; k<ncols; k++) {
3271         icols[k] = dof*cols[k]+j;
3272       }
3273       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3274       ii++;
3275     }
3276     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3277   }
3278   ierr = PetscFree(icols);CHKERRQ(ierr);
3279   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3280   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3281 
3282   if (reuse == MAT_REUSE_MATRIX) {
3283     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3284   } else {
3285     *newmat = B;
3286   }
3287   PetscFunctionReturn(0);
3288 }
3289 
3290 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3291 
3292 #undef __FUNCT__
3293 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
3294 PETSC_EXTERN_C PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3295 {
3296   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
3297   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3298   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3299   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3300   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3301   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3302   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
3303   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
3304   PetscInt          rstart,cstart,*garray,ii,k;
3305   PetscErrorCode    ierr;
3306   PetscScalar       *vals,*ovals;
3307 
3308   PetscFunctionBegin;
3309   ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr);
3310   for (i=0; i<A->rmap->n/dof; i++) {
3311     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3312     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3313     for (j=0; j<dof; j++) {
3314       dnz[dof*i+j] = AIJ->ilen[i];
3315       onz[dof*i+j] = OAIJ->ilen[i];
3316     }
3317   }
3318   ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
3319   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
3320 
3321   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
3322   rstart = dof*maij->A->rmap->rstart;
3323   cstart = dof*maij->A->cmap->rstart;
3324   garray = mpiaij->garray;
3325 
3326   ii = rstart;
3327   for (i=0; i<A->rmap->n/dof; i++) {
3328     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3329     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3330     for (j=0; j<dof; j++) {
3331       for (k=0; k<ncols; k++) {
3332         icols[k] = cstart + dof*cols[k]+j;
3333       }
3334       for (k=0; k<oncols; k++) {
3335         oicols[k] = dof*garray[ocols[k]]+j;
3336       }
3337       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3338       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
3339       ii++;
3340     }
3341     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3342     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3343   }
3344   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
3345 
3346   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3347   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3348 
3349   if (reuse == MAT_REUSE_MATRIX) {
3350     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3351     ((PetscObject)A)->refct = 1;
3352     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3353     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3354   } else {
3355     *newmat = B;
3356   }
3357   PetscFunctionReturn(0);
3358 }
3359 
3360 #undef __FUNCT__
3361 #define __FUNCT__ "MatGetSubMatrix_MAIJ"
3362 PetscErrorCode  MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3363 {
3364   PetscErrorCode ierr;
3365   Mat            A;
3366 
3367   PetscFunctionBegin;
3368   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3369   ierr = MatGetSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
3370   ierr = MatDestroy(&A);CHKERRQ(ierr);
3371   PetscFunctionReturn(0);
3372 }
3373 
3374 /* ---------------------------------------------------------------------------------- */
3375 #undef __FUNCT__
3376 #define __FUNCT__ "MatCreateMAIJ"
3377 /*@C
3378   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3379   operations for multicomponent problems.  It interpolates each component the same
3380   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3381   and MATMPIAIJ for distributed matrices.
3382 
3383   Collective
3384 
3385   Input Parameters:
3386 + A - the AIJ matrix describing the action on blocks
3387 - dof - the block size (number of components per node)
3388 
3389   Output Parameter:
3390 . maij - the new MAIJ matrix
3391 
3392   Operations provided:
3393 + MatMult
3394 . MatMultTranspose
3395 . MatMultAdd
3396 . MatMultTransposeAdd
3397 - MatView
3398 
3399   Level: advanced
3400 
3401 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3402 @*/
3403 PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3404 {
3405   PetscErrorCode ierr;
3406   PetscMPIInt    size;
3407   PetscInt       n;
3408   Mat            B;
3409 
3410   PetscFunctionBegin;
3411   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3412 
3413   if (dof == 1) {
3414     *maij = A;
3415   } else {
3416     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3417     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3418     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3419     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3420     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3421     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3422     B->assembled    = PETSC_TRUE;
3423 
3424     ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3425     if (size == 1) {
3426       Mat_SeqMAIJ    *b;
3427 
3428       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
3429       B->ops->setup   = PETSC_NULL;
3430       B->ops->destroy = MatDestroy_SeqMAIJ;
3431       B->ops->view    = MatView_SeqMAIJ;
3432       b      = (Mat_SeqMAIJ*)B->data;
3433       b->dof = dof;
3434       b->AIJ = A;
3435       if (dof == 2) {
3436         B->ops->mult             = MatMult_SeqMAIJ_2;
3437         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3438         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3439         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3440       } else if (dof == 3) {
3441         B->ops->mult             = MatMult_SeqMAIJ_3;
3442         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3443         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3444         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3445       } else if (dof == 4) {
3446         B->ops->mult             = MatMult_SeqMAIJ_4;
3447         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3448         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3449         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3450       } else if (dof == 5) {
3451         B->ops->mult             = MatMult_SeqMAIJ_5;
3452         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3453         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3454         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3455       } else if (dof == 6) {
3456         B->ops->mult             = MatMult_SeqMAIJ_6;
3457         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3458         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3459         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3460       } else if (dof == 7) {
3461         B->ops->mult             = MatMult_SeqMAIJ_7;
3462         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3463         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3464         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3465       } else if (dof == 8) {
3466         B->ops->mult             = MatMult_SeqMAIJ_8;
3467         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3468         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3469         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3470       } else if (dof == 9) {
3471         B->ops->mult             = MatMult_SeqMAIJ_9;
3472         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3473         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3474         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3475       } else if (dof == 10) {
3476         B->ops->mult             = MatMult_SeqMAIJ_10;
3477         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3478         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3479         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3480       } else if (dof == 11) {
3481         B->ops->mult             = MatMult_SeqMAIJ_11;
3482         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3483         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3484         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3485       } else if (dof == 16) {
3486         B->ops->mult             = MatMult_SeqMAIJ_16;
3487         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3488         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3489         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3490       } else if (dof == 18) {
3491         B->ops->mult             = MatMult_SeqMAIJ_18;
3492         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3493         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3494         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3495       } else {
3496         B->ops->mult             = MatMult_SeqMAIJ_N;
3497         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3498         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3499         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3500       }
3501       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3502       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatPtAP_seqaij_seqmaij_C","MatPtAP_SeqAIJ_SeqMAIJ",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3503     } else {
3504       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3505       Mat_MPIMAIJ *b;
3506       IS          from,to;
3507       Vec         gvec;
3508 
3509       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3510       B->ops->setup   = PETSC_NULL;
3511       B->ops->destroy = MatDestroy_MPIMAIJ;
3512       B->ops->view    = MatView_MPIMAIJ;
3513       b      = (Mat_MPIMAIJ*)B->data;
3514       b->dof = dof;
3515       b->A   = A;
3516       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
3517       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
3518 
3519       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3520       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3521       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
3522       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3523       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3524 
3525       /* create two temporary Index sets for build scatter gather */
3526       ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3527       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3528 
3529       /* create temporary global vector to generate scatter context */
3530       ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
3531 
3532       /* generate the scatter context */
3533       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3534 
3535       ierr = ISDestroy(&from);CHKERRQ(ierr);
3536       ierr = ISDestroy(&to);CHKERRQ(ierr);
3537       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3538 
3539       B->ops->mult                = MatMult_MPIMAIJ_dof;
3540       B->ops->multtranspose       = MatMultTranspose_MPIMAIJ_dof;
3541       B->ops->multadd             = MatMultAdd_MPIMAIJ_dof;
3542       B->ops->multtransposeadd    = MatMultTransposeAdd_MPIMAIJ_dof;
3543       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3544       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C","MatPtAP_MPIAIJ_MPIMAIJ",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr);
3545     }
3546     B->ops->getsubmatrix        = MatGetSubMatrix_MAIJ;
3547     ierr = MatSetUp(B);CHKERRQ(ierr);
3548     *maij = B;
3549     ierr = MatViewFromOptions(B,"-mat_view");CHKERRQ(ierr);
3550   }
3551   PetscFunctionReturn(0);
3552 }
3553