xref: /petsc/src/mat/impls/maij/maij.c (revision f3fe499b4cc4d64bf04aa4f5e4963dcc4eb56541)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the MAIJ  matrix storage format.
5   This format is used for restriction and interpolation operations for
6   multicomponent problems. It interpolates each component the same way
7   independently.
8 
9      We provide:
10          MatMult()
11          MatMultTranspose()
12          MatMultTransposeAdd()
13          MatMultAdd()
14           and
15          MatCreateMAIJ(Mat,dof,Mat*)
16 
17      This single directory handles both the sequential and parallel codes
18 */
19 
20 #include "../src/mat/impls/maij/maij.h" /*I "petscmat.h" I*/
21 #include "../src/mat/utils/freespace.h"
22 #include "private/vecimpl.h"
23 
24 #undef __FUNCT__
25 #define __FUNCT__ "MatMAIJGetAIJ"
26 /*@C
27    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
28 
29    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
30 
31    Input Parameter:
32 .  A - the MAIJ matrix
33 
34    Output Parameter:
35 .  B - the AIJ matrix
36 
37    Level: advanced
38 
39    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
40 
41 .seealso: MatCreateMAIJ()
42 @*/
43 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B)
44 {
45   PetscErrorCode ierr;
46   PetscTruth     ismpimaij,isseqmaij;
47 
48   PetscFunctionBegin;
49   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
50   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
51   if (ismpimaij) {
52     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
53 
54     *B = b->A;
55   } else if (isseqmaij) {
56     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
57 
58     *B = b->AIJ;
59   } else {
60     *B = A;
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 #undef __FUNCT__
66 #define __FUNCT__ "MatMAIJRedimension"
67 /*@C
68    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
69 
70    Logically Collective
71 
72    Input Parameter:
73 +  A - the MAIJ matrix
74 -  dof - the block size for the new matrix
75 
76    Output Parameter:
77 .  B - the new MAIJ matrix
78 
79    Level: advanced
80 
81 .seealso: MatCreateMAIJ()
82 @*/
83 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
84 {
85   PetscErrorCode ierr;
86   Mat            Aij = PETSC_NULL;
87 
88   PetscFunctionBegin;
89   PetscValidLogicalCollectiveInt(A,dof,2);
90   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
91   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
92   PetscFunctionReturn(0);
93 }
94 
95 #undef __FUNCT__
96 #define __FUNCT__ "MatDestroy_SeqMAIJ"
97 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
98 {
99   PetscErrorCode ierr;
100   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
101 
102   PetscFunctionBegin;
103   if (b->AIJ) {
104     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
105   }
106   ierr = PetscFree(b);CHKERRQ(ierr);
107   PetscFunctionReturn(0);
108 }
109 
110 #undef __FUNCT__
111 #define __FUNCT__ "MatView_SeqMAIJ"
112 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
113 {
114   PetscErrorCode ierr;
115   Mat            B;
116 
117   PetscFunctionBegin;
118   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
119   ierr = MatView(B,viewer);CHKERRQ(ierr);
120   ierr = MatDestroy(B);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatView_MPIMAIJ"
126 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
127 {
128   PetscErrorCode ierr;
129   Mat            B;
130 
131   PetscFunctionBegin;
132   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
133   ierr = MatView(B,viewer);CHKERRQ(ierr);
134   ierr = MatDestroy(B);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatDestroy_MPIMAIJ"
140 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
141 {
142   PetscErrorCode ierr;
143   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
144 
145   PetscFunctionBegin;
146   if (b->AIJ) {
147     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
148   }
149   if (b->OAIJ) {
150     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
151   }
152   if (b->A) {
153     ierr = MatDestroy(b->A);CHKERRQ(ierr);
154   }
155   if (b->ctx) {
156     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
157   }
158   if (b->w) {
159     ierr = VecDestroy(b->w);CHKERRQ(ierr);
160   }
161   ierr = PetscFree(b);CHKERRQ(ierr);
162   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
163   PetscFunctionReturn(0);
164 }
165 
166 /*MC
167   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
168   multicomponent problems, interpolating or restricting each component the same way independently.
169   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
170 
171   Operations provided:
172 . MatMult
173 . MatMultTranspose
174 . MatMultAdd
175 . MatMultTransposeAdd
176 
177   Level: advanced
178 
179 .seealso: MatCreateSeqDense
180 M*/
181 
182 EXTERN_C_BEGIN
183 #undef __FUNCT__
184 #define __FUNCT__ "MatCreate_MAIJ"
185 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A)
186 {
187   PetscErrorCode ierr;
188   Mat_MPIMAIJ    *b;
189   PetscMPIInt    size;
190 
191   PetscFunctionBegin;
192   ierr     = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr);
193   A->data  = (void*)b;
194   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
195   A->mapping          = 0;
196 
197   b->AIJ  = 0;
198   b->dof  = 0;
199   b->OAIJ = 0;
200   b->ctx  = 0;
201   b->w    = 0;
202   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
203   if (size == 1){
204     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
205   } else {
206     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
207   }
208   PetscFunctionReturn(0);
209 }
210 EXTERN_C_END
211 
212 /* --------------------------------------------------------------------------------------*/
213 #undef __FUNCT__
214 #define __FUNCT__ "MatMult_SeqMAIJ_2"
215 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
216 {
217   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
218   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
219   const PetscScalar *x,*v;
220   PetscScalar       *y, sum1, sum2;
221   PetscErrorCode    ierr;
222   PetscInt          nonzerorow=0,n,i,jrow,j;
223   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
224 
225   PetscFunctionBegin;
226   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
227   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
228   idx  = a->j;
229   v    = a->a;
230   ii   = a->i;
231 
232   for (i=0; i<m; i++) {
233     jrow = ii[i];
234     n    = ii[i+1] - jrow;
235     sum1  = 0.0;
236     sum2  = 0.0;
237     nonzerorow += (n>0);
238     for (j=0; j<n; j++) {
239       sum1 += v[jrow]*x[2*idx[jrow]];
240       sum2 += v[jrow]*x[2*idx[jrow]+1];
241       jrow++;
242      }
243     y[2*i]   = sum1;
244     y[2*i+1] = sum2;
245   }
246 
247   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
248   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
249   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
250   PetscFunctionReturn(0);
251 }
252 
253 #undef __FUNCT__
254 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
255 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
256 {
257   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
258   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
259   const PetscScalar *x,*v;
260   PetscScalar       *y,alpha1,alpha2;
261   PetscErrorCode    ierr;
262   PetscInt          n,i;
263   const PetscInt    m = b->AIJ->rmap->n,*idx;
264 
265   PetscFunctionBegin;
266   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
267   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
268   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
269 
270   for (i=0; i<m; i++) {
271     idx    = a->j + a->i[i] ;
272     v      = a->a + a->i[i] ;
273     n      = a->i[i+1] - a->i[i];
274     alpha1 = x[2*i];
275     alpha2 = x[2*i+1];
276     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
277   }
278   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
279   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
280   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 #undef __FUNCT__
285 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
286 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
287 {
288   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
289   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
290   const PetscScalar *x,*v;
291   PetscScalar       *y,sum1, sum2;
292   PetscErrorCode    ierr;
293   PetscInt          n,i,jrow,j;
294   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
295 
296   PetscFunctionBegin;
297   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
298   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
299   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
300   idx  = a->j;
301   v    = a->a;
302   ii   = a->i;
303 
304   for (i=0; i<m; i++) {
305     jrow = ii[i];
306     n    = ii[i+1] - jrow;
307     sum1  = 0.0;
308     sum2  = 0.0;
309     for (j=0; j<n; j++) {
310       sum1 += v[jrow]*x[2*idx[jrow]];
311       sum2 += v[jrow]*x[2*idx[jrow]+1];
312       jrow++;
313      }
314     y[2*i]   += sum1;
315     y[2*i+1] += sum2;
316   }
317 
318   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
319   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
320   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
321   PetscFunctionReturn(0);
322 }
323 #undef __FUNCT__
324 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
325 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
326 {
327   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
328   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
329   const PetscScalar *x,*v;
330   PetscScalar       *y,alpha1,alpha2;
331   PetscErrorCode    ierr;
332   PetscInt          n,i;
333   const PetscInt    m = b->AIJ->rmap->n,*idx;
334 
335   PetscFunctionBegin;
336   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
337   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
338   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
339 
340   for (i=0; i<m; i++) {
341     idx   = a->j + a->i[i] ;
342     v     = a->a + a->i[i] ;
343     n     = a->i[i+1] - a->i[i];
344     alpha1 = x[2*i];
345     alpha2 = x[2*i+1];
346     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
347   }
348   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
349   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
350   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
351   PetscFunctionReturn(0);
352 }
353 /* --------------------------------------------------------------------------------------*/
354 #undef __FUNCT__
355 #define __FUNCT__ "MatMult_SeqMAIJ_3"
356 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
357 {
358   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
359   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
360   const PetscScalar *x,*v;
361   PetscScalar       *y,sum1, sum2, sum3;
362   PetscErrorCode    ierr;
363   PetscInt          nonzerorow=0,n,i,jrow,j;
364   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
365 
366   PetscFunctionBegin;
367   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
368   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
369   idx  = a->j;
370   v    = a->a;
371   ii   = a->i;
372 
373   for (i=0; i<m; i++) {
374     jrow = ii[i];
375     n    = ii[i+1] - jrow;
376     sum1  = 0.0;
377     sum2  = 0.0;
378     sum3  = 0.0;
379     nonzerorow += (n>0);
380     for (j=0; j<n; j++) {
381       sum1 += v[jrow]*x[3*idx[jrow]];
382       sum2 += v[jrow]*x[3*idx[jrow]+1];
383       sum3 += v[jrow]*x[3*idx[jrow]+2];
384       jrow++;
385      }
386     y[3*i]   = sum1;
387     y[3*i+1] = sum2;
388     y[3*i+2] = sum3;
389   }
390 
391   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
392   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
393   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 #undef __FUNCT__
398 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
399 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
400 {
401   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
402   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
403   const PetscScalar *x,*v;
404   PetscScalar       *y,alpha1,alpha2,alpha3;
405   PetscErrorCode    ierr;
406   PetscInt          n,i;
407   const PetscInt    m = b->AIJ->rmap->n,*idx;
408 
409   PetscFunctionBegin;
410   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
411   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
412   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
413 
414   for (i=0; i<m; i++) {
415     idx    = a->j + a->i[i];
416     v      = a->a + a->i[i];
417     n      = a->i[i+1] - a->i[i];
418     alpha1 = x[3*i];
419     alpha2 = x[3*i+1];
420     alpha3 = x[3*i+2];
421     while (n-->0) {
422       y[3*(*idx)]   += alpha1*(*v);
423       y[3*(*idx)+1] += alpha2*(*v);
424       y[3*(*idx)+2] += alpha3*(*v);
425       idx++; v++;
426     }
427   }
428   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
429   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
430   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNCT__
435 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
436 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
437 {
438   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
439   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
440   const PetscScalar *x,*v;
441   PetscScalar       *y,sum1, sum2, sum3;
442   PetscErrorCode    ierr;
443   PetscInt          n,i,jrow,j;
444   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
445 
446   PetscFunctionBegin;
447   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
448   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
449   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
450   idx  = a->j;
451   v    = a->a;
452   ii   = a->i;
453 
454   for (i=0; i<m; i++) {
455     jrow = ii[i];
456     n    = ii[i+1] - jrow;
457     sum1  = 0.0;
458     sum2  = 0.0;
459     sum3  = 0.0;
460     for (j=0; j<n; j++) {
461       sum1 += v[jrow]*x[3*idx[jrow]];
462       sum2 += v[jrow]*x[3*idx[jrow]+1];
463       sum3 += v[jrow]*x[3*idx[jrow]+2];
464       jrow++;
465      }
466     y[3*i]   += sum1;
467     y[3*i+1] += sum2;
468     y[3*i+2] += sum3;
469   }
470 
471   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
472   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
473   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
474   PetscFunctionReturn(0);
475 }
476 #undef __FUNCT__
477 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
478 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
479 {
480   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
481   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
482   const PetscScalar *x,*v;
483   PetscScalar       *y,alpha1,alpha2,alpha3;
484   PetscErrorCode    ierr;
485   PetscInt          n,i;
486   const PetscInt    m = b->AIJ->rmap->n,*idx;
487 
488   PetscFunctionBegin;
489   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
490   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
491   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
492   for (i=0; i<m; i++) {
493     idx    = a->j + a->i[i] ;
494     v      = a->a + a->i[i] ;
495     n      = a->i[i+1] - a->i[i];
496     alpha1 = x[3*i];
497     alpha2 = x[3*i+1];
498     alpha3 = x[3*i+2];
499     while (n-->0) {
500       y[3*(*idx)]   += alpha1*(*v);
501       y[3*(*idx)+1] += alpha2*(*v);
502       y[3*(*idx)+2] += alpha3*(*v);
503       idx++; v++;
504     }
505   }
506   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
507   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
508   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 /* ------------------------------------------------------------------------------*/
513 #undef __FUNCT__
514 #define __FUNCT__ "MatMult_SeqMAIJ_4"
515 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
516 {
517   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
518   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
519   const PetscScalar *x,*v;
520   PetscScalar       *y,sum1, sum2, sum3, sum4;
521   PetscErrorCode    ierr;
522   PetscInt          nonzerorow=0,n,i,jrow,j;
523   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
524 
525   PetscFunctionBegin;
526   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
527   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
528   idx  = a->j;
529   v    = a->a;
530   ii   = a->i;
531 
532   for (i=0; i<m; i++) {
533     jrow = ii[i];
534     n    = ii[i+1] - jrow;
535     sum1  = 0.0;
536     sum2  = 0.0;
537     sum3  = 0.0;
538     sum4  = 0.0;
539     nonzerorow += (n>0);
540     for (j=0; j<n; j++) {
541       sum1 += v[jrow]*x[4*idx[jrow]];
542       sum2 += v[jrow]*x[4*idx[jrow]+1];
543       sum3 += v[jrow]*x[4*idx[jrow]+2];
544       sum4 += v[jrow]*x[4*idx[jrow]+3];
545       jrow++;
546      }
547     y[4*i]   = sum1;
548     y[4*i+1] = sum2;
549     y[4*i+2] = sum3;
550     y[4*i+3] = sum4;
551   }
552 
553   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
554   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
555   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
561 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
562 {
563   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
564   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
565   const PetscScalar *x,*v;
566   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
567   PetscErrorCode    ierr;
568   PetscInt          n,i;
569   const PetscInt    m = b->AIJ->rmap->n,*idx;
570 
571   PetscFunctionBegin;
572   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
573   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
574   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
575   for (i=0; i<m; i++) {
576     idx    = a->j + a->i[i] ;
577     v      = a->a + a->i[i] ;
578     n      = a->i[i+1] - a->i[i];
579     alpha1 = x[4*i];
580     alpha2 = x[4*i+1];
581     alpha3 = x[4*i+2];
582     alpha4 = x[4*i+3];
583     while (n-->0) {
584       y[4*(*idx)]   += alpha1*(*v);
585       y[4*(*idx)+1] += alpha2*(*v);
586       y[4*(*idx)+2] += alpha3*(*v);
587       y[4*(*idx)+3] += alpha4*(*v);
588       idx++; v++;
589     }
590   }
591   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
592   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
593   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
599 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
600 {
601   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
602   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
603   const PetscScalar *x,*v;
604   PetscScalar       *y,sum1, sum2, sum3, sum4;
605   PetscErrorCode    ierr;
606   PetscInt          n,i,jrow,j;
607   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
608 
609   PetscFunctionBegin;
610   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
611   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
612   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
613   idx  = a->j;
614   v    = a->a;
615   ii   = a->i;
616 
617   for (i=0; i<m; i++) {
618     jrow = ii[i];
619     n    = ii[i+1] - jrow;
620     sum1  = 0.0;
621     sum2  = 0.0;
622     sum3  = 0.0;
623     sum4  = 0.0;
624     for (j=0; j<n; j++) {
625       sum1 += v[jrow]*x[4*idx[jrow]];
626       sum2 += v[jrow]*x[4*idx[jrow]+1];
627       sum3 += v[jrow]*x[4*idx[jrow]+2];
628       sum4 += v[jrow]*x[4*idx[jrow]+3];
629       jrow++;
630      }
631     y[4*i]   += sum1;
632     y[4*i+1] += sum2;
633     y[4*i+2] += sum3;
634     y[4*i+3] += sum4;
635   }
636 
637   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
638   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
639   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
640   PetscFunctionReturn(0);
641 }
642 #undef __FUNCT__
643 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
644 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
645 {
646   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
647   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
648   const PetscScalar *x,*v;
649   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
650   PetscErrorCode    ierr;
651   PetscInt          n,i;
652   const PetscInt    m = b->AIJ->rmap->n,*idx;
653 
654   PetscFunctionBegin;
655   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
656   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
657   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
658 
659   for (i=0; i<m; i++) {
660     idx    = a->j + a->i[i] ;
661     v      = a->a + a->i[i] ;
662     n      = a->i[i+1] - a->i[i];
663     alpha1 = x[4*i];
664     alpha2 = x[4*i+1];
665     alpha3 = x[4*i+2];
666     alpha4 = x[4*i+3];
667     while (n-->0) {
668       y[4*(*idx)]   += alpha1*(*v);
669       y[4*(*idx)+1] += alpha2*(*v);
670       y[4*(*idx)+2] += alpha3*(*v);
671       y[4*(*idx)+3] += alpha4*(*v);
672       idx++; v++;
673     }
674   }
675   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
676   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
677   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
678   PetscFunctionReturn(0);
679 }
680 /* ------------------------------------------------------------------------------*/
681 
682 #undef __FUNCT__
683 #define __FUNCT__ "MatMult_SeqMAIJ_5"
684 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
685 {
686   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
687   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
688   const PetscScalar *x,*v;
689   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
690   PetscErrorCode    ierr;
691   PetscInt          nonzerorow=0,n,i,jrow,j;
692   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
693 
694   PetscFunctionBegin;
695   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
696   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
697   idx  = a->j;
698   v    = a->a;
699   ii   = a->i;
700 
701   for (i=0; i<m; i++) {
702     jrow = ii[i];
703     n    = ii[i+1] - jrow;
704     sum1  = 0.0;
705     sum2  = 0.0;
706     sum3  = 0.0;
707     sum4  = 0.0;
708     sum5  = 0.0;
709     nonzerorow += (n>0);
710     for (j=0; j<n; j++) {
711       sum1 += v[jrow]*x[5*idx[jrow]];
712       sum2 += v[jrow]*x[5*idx[jrow]+1];
713       sum3 += v[jrow]*x[5*idx[jrow]+2];
714       sum4 += v[jrow]*x[5*idx[jrow]+3];
715       sum5 += v[jrow]*x[5*idx[jrow]+4];
716       jrow++;
717      }
718     y[5*i]   = sum1;
719     y[5*i+1] = sum2;
720     y[5*i+2] = sum3;
721     y[5*i+3] = sum4;
722     y[5*i+4] = sum5;
723   }
724 
725   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
726   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
727   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
733 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
734 {
735   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
736   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
737   const PetscScalar *x,*v;
738   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
739   PetscErrorCode    ierr;
740   PetscInt          n,i;
741   const PetscInt    m = b->AIJ->rmap->n,*idx;
742 
743   PetscFunctionBegin;
744   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
745   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
746   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
747 
748   for (i=0; i<m; i++) {
749     idx    = a->j + a->i[i] ;
750     v      = a->a + a->i[i] ;
751     n      = a->i[i+1] - a->i[i];
752     alpha1 = x[5*i];
753     alpha2 = x[5*i+1];
754     alpha3 = x[5*i+2];
755     alpha4 = x[5*i+3];
756     alpha5 = x[5*i+4];
757     while (n-->0) {
758       y[5*(*idx)]   += alpha1*(*v);
759       y[5*(*idx)+1] += alpha2*(*v);
760       y[5*(*idx)+2] += alpha3*(*v);
761       y[5*(*idx)+3] += alpha4*(*v);
762       y[5*(*idx)+4] += alpha5*(*v);
763       idx++; v++;
764     }
765   }
766   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
767   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
768   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
769   PetscFunctionReturn(0);
770 }
771 
772 #undef __FUNCT__
773 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
774 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
775 {
776   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
777   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
778   const PetscScalar *x,*v;
779   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
780   PetscErrorCode    ierr;
781   PetscInt          n,i,jrow,j;
782   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
783 
784   PetscFunctionBegin;
785   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
786   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
787   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
788   idx  = a->j;
789   v    = a->a;
790   ii   = a->i;
791 
792   for (i=0; i<m; i++) {
793     jrow = ii[i];
794     n    = ii[i+1] - jrow;
795     sum1  = 0.0;
796     sum2  = 0.0;
797     sum3  = 0.0;
798     sum4  = 0.0;
799     sum5  = 0.0;
800     for (j=0; j<n; j++) {
801       sum1 += v[jrow]*x[5*idx[jrow]];
802       sum2 += v[jrow]*x[5*idx[jrow]+1];
803       sum3 += v[jrow]*x[5*idx[jrow]+2];
804       sum4 += v[jrow]*x[5*idx[jrow]+3];
805       sum5 += v[jrow]*x[5*idx[jrow]+4];
806       jrow++;
807      }
808     y[5*i]   += sum1;
809     y[5*i+1] += sum2;
810     y[5*i+2] += sum3;
811     y[5*i+3] += sum4;
812     y[5*i+4] += sum5;
813   }
814 
815   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
816   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
817   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNCT__
822 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
823 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
824 {
825   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
826   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
827   const PetscScalar *x,*v;
828   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
829   PetscErrorCode    ierr;
830   PetscInt          n,i;
831   const PetscInt    m = b->AIJ->rmap->n,*idx;
832 
833   PetscFunctionBegin;
834   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
835   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
836   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
837 
838   for (i=0; i<m; i++) {
839     idx    = a->j + a->i[i] ;
840     v      = a->a + a->i[i] ;
841     n      = a->i[i+1] - a->i[i];
842     alpha1 = x[5*i];
843     alpha2 = x[5*i+1];
844     alpha3 = x[5*i+2];
845     alpha4 = x[5*i+3];
846     alpha5 = x[5*i+4];
847     while (n-->0) {
848       y[5*(*idx)]   += alpha1*(*v);
849       y[5*(*idx)+1] += alpha2*(*v);
850       y[5*(*idx)+2] += alpha3*(*v);
851       y[5*(*idx)+3] += alpha4*(*v);
852       y[5*(*idx)+4] += alpha5*(*v);
853       idx++; v++;
854     }
855   }
856   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
857   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
858   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 /* ------------------------------------------------------------------------------*/
863 #undef __FUNCT__
864 #define __FUNCT__ "MatMult_SeqMAIJ_6"
865 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
866 {
867   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
868   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
869   const PetscScalar *x,*v;
870   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
871   PetscErrorCode    ierr;
872   PetscInt          nonzerorow=0,n,i,jrow,j;
873   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
874 
875   PetscFunctionBegin;
876   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
877   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
878   idx  = a->j;
879   v    = a->a;
880   ii   = a->i;
881 
882   for (i=0; i<m; i++) {
883     jrow = ii[i];
884     n    = ii[i+1] - jrow;
885     sum1  = 0.0;
886     sum2  = 0.0;
887     sum3  = 0.0;
888     sum4  = 0.0;
889     sum5  = 0.0;
890     sum6  = 0.0;
891     nonzerorow += (n>0);
892     for (j=0; j<n; j++) {
893       sum1 += v[jrow]*x[6*idx[jrow]];
894       sum2 += v[jrow]*x[6*idx[jrow]+1];
895       sum3 += v[jrow]*x[6*idx[jrow]+2];
896       sum4 += v[jrow]*x[6*idx[jrow]+3];
897       sum5 += v[jrow]*x[6*idx[jrow]+4];
898       sum6 += v[jrow]*x[6*idx[jrow]+5];
899       jrow++;
900      }
901     y[6*i]   = sum1;
902     y[6*i+1] = sum2;
903     y[6*i+2] = sum3;
904     y[6*i+3] = sum4;
905     y[6*i+4] = sum5;
906     y[6*i+5] = sum6;
907   }
908 
909   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
910   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
911   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
917 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
918 {
919   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
920   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
921   const PetscScalar *x,*v;
922   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
923   PetscErrorCode    ierr;
924   PetscInt          n,i;
925   const PetscInt    m = b->AIJ->rmap->n,*idx;
926 
927   PetscFunctionBegin;
928   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
929   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
930   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
931 
932   for (i=0; i<m; i++) {
933     idx    = a->j + a->i[i] ;
934     v      = a->a + a->i[i] ;
935     n      = a->i[i+1] - a->i[i];
936     alpha1 = x[6*i];
937     alpha2 = x[6*i+1];
938     alpha3 = x[6*i+2];
939     alpha4 = x[6*i+3];
940     alpha5 = x[6*i+4];
941     alpha6 = x[6*i+5];
942     while (n-->0) {
943       y[6*(*idx)]   += alpha1*(*v);
944       y[6*(*idx)+1] += alpha2*(*v);
945       y[6*(*idx)+2] += alpha3*(*v);
946       y[6*(*idx)+3] += alpha4*(*v);
947       y[6*(*idx)+4] += alpha5*(*v);
948       y[6*(*idx)+5] += alpha6*(*v);
949       idx++; v++;
950     }
951   }
952   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
953   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
954   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
955   PetscFunctionReturn(0);
956 }
957 
958 #undef __FUNCT__
959 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
960 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
961 {
962   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
963   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
964   const PetscScalar *x,*v;
965   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
966   PetscErrorCode    ierr;
967   PetscInt          n,i,jrow,j;
968   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
969 
970   PetscFunctionBegin;
971   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
972   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
973   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
974   idx  = a->j;
975   v    = a->a;
976   ii   = a->i;
977 
978   for (i=0; i<m; i++) {
979     jrow = ii[i];
980     n    = ii[i+1] - jrow;
981     sum1  = 0.0;
982     sum2  = 0.0;
983     sum3  = 0.0;
984     sum4  = 0.0;
985     sum5  = 0.0;
986     sum6  = 0.0;
987     for (j=0; j<n; j++) {
988       sum1 += v[jrow]*x[6*idx[jrow]];
989       sum2 += v[jrow]*x[6*idx[jrow]+1];
990       sum3 += v[jrow]*x[6*idx[jrow]+2];
991       sum4 += v[jrow]*x[6*idx[jrow]+3];
992       sum5 += v[jrow]*x[6*idx[jrow]+4];
993       sum6 += v[jrow]*x[6*idx[jrow]+5];
994       jrow++;
995      }
996     y[6*i]   += sum1;
997     y[6*i+1] += sum2;
998     y[6*i+2] += sum3;
999     y[6*i+3] += sum4;
1000     y[6*i+4] += sum5;
1001     y[6*i+5] += sum6;
1002   }
1003 
1004   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1005   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1006   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 #undef __FUNCT__
1011 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
1012 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
1013 {
1014   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1015   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1016   const PetscScalar *x,*v;
1017   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1018   PetscErrorCode    ierr;
1019   PetscInt          n,i;
1020   const PetscInt    m = b->AIJ->rmap->n,*idx;
1021 
1022   PetscFunctionBegin;
1023   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1024   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1025   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1026 
1027   for (i=0; i<m; i++) {
1028     idx    = a->j + a->i[i] ;
1029     v      = a->a + a->i[i] ;
1030     n      = a->i[i+1] - a->i[i];
1031     alpha1 = x[6*i];
1032     alpha2 = x[6*i+1];
1033     alpha3 = x[6*i+2];
1034     alpha4 = x[6*i+3];
1035     alpha5 = x[6*i+4];
1036     alpha6 = x[6*i+5];
1037     while (n-->0) {
1038       y[6*(*idx)]   += alpha1*(*v);
1039       y[6*(*idx)+1] += alpha2*(*v);
1040       y[6*(*idx)+2] += alpha3*(*v);
1041       y[6*(*idx)+3] += alpha4*(*v);
1042       y[6*(*idx)+4] += alpha5*(*v);
1043       y[6*(*idx)+5] += alpha6*(*v);
1044       idx++; v++;
1045     }
1046   }
1047   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1048   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1049   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 /* ------------------------------------------------------------------------------*/
1054 #undef __FUNCT__
1055 #define __FUNCT__ "MatMult_SeqMAIJ_7"
1056 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1057 {
1058   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1059   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1060   const PetscScalar *x,*v;
1061   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1062   PetscErrorCode    ierr;
1063   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1064   PetscInt          nonzerorow=0,n,i,jrow,j;
1065 
1066   PetscFunctionBegin;
1067   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1068   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1069   idx  = a->j;
1070   v    = a->a;
1071   ii   = a->i;
1072 
1073   for (i=0; i<m; i++) {
1074     jrow = ii[i];
1075     n    = ii[i+1] - jrow;
1076     sum1  = 0.0;
1077     sum2  = 0.0;
1078     sum3  = 0.0;
1079     sum4  = 0.0;
1080     sum5  = 0.0;
1081     sum6  = 0.0;
1082     sum7  = 0.0;
1083     nonzerorow += (n>0);
1084     for (j=0; j<n; j++) {
1085       sum1 += v[jrow]*x[7*idx[jrow]];
1086       sum2 += v[jrow]*x[7*idx[jrow]+1];
1087       sum3 += v[jrow]*x[7*idx[jrow]+2];
1088       sum4 += v[jrow]*x[7*idx[jrow]+3];
1089       sum5 += v[jrow]*x[7*idx[jrow]+4];
1090       sum6 += v[jrow]*x[7*idx[jrow]+5];
1091       sum7 += v[jrow]*x[7*idx[jrow]+6];
1092       jrow++;
1093      }
1094     y[7*i]   = sum1;
1095     y[7*i+1] = sum2;
1096     y[7*i+2] = sum3;
1097     y[7*i+3] = sum4;
1098     y[7*i+4] = sum5;
1099     y[7*i+5] = sum6;
1100     y[7*i+6] = sum7;
1101   }
1102 
1103   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
1104   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1105   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1111 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1112 {
1113   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1114   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1115   const PetscScalar *x,*v;
1116   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1117   PetscErrorCode    ierr;
1118   const PetscInt    m = b->AIJ->rmap->n,*idx;
1119   PetscInt          n,i;
1120 
1121   PetscFunctionBegin;
1122   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1123   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1124   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1125 
1126   for (i=0; i<m; i++) {
1127     idx    = a->j + a->i[i] ;
1128     v      = a->a + a->i[i] ;
1129     n      = a->i[i+1] - a->i[i];
1130     alpha1 = x[7*i];
1131     alpha2 = x[7*i+1];
1132     alpha3 = x[7*i+2];
1133     alpha4 = x[7*i+3];
1134     alpha5 = x[7*i+4];
1135     alpha6 = x[7*i+5];
1136     alpha7 = x[7*i+6];
1137     while (n-->0) {
1138       y[7*(*idx)]   += alpha1*(*v);
1139       y[7*(*idx)+1] += alpha2*(*v);
1140       y[7*(*idx)+2] += alpha3*(*v);
1141       y[7*(*idx)+3] += alpha4*(*v);
1142       y[7*(*idx)+4] += alpha5*(*v);
1143       y[7*(*idx)+5] += alpha6*(*v);
1144       y[7*(*idx)+6] += alpha7*(*v);
1145       idx++; v++;
1146     }
1147   }
1148   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1149   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1150   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #undef __FUNCT__
1155 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1156 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1157 {
1158   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1159   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1160   const PetscScalar *x,*v;
1161   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1162   PetscErrorCode    ierr;
1163   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1164   PetscInt          n,i,jrow,j;
1165 
1166   PetscFunctionBegin;
1167   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1168   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1169   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1170   idx  = a->j;
1171   v    = a->a;
1172   ii   = a->i;
1173 
1174   for (i=0; i<m; i++) {
1175     jrow = ii[i];
1176     n    = ii[i+1] - jrow;
1177     sum1  = 0.0;
1178     sum2  = 0.0;
1179     sum3  = 0.0;
1180     sum4  = 0.0;
1181     sum5  = 0.0;
1182     sum6  = 0.0;
1183     sum7  = 0.0;
1184     for (j=0; j<n; j++) {
1185       sum1 += v[jrow]*x[7*idx[jrow]];
1186       sum2 += v[jrow]*x[7*idx[jrow]+1];
1187       sum3 += v[jrow]*x[7*idx[jrow]+2];
1188       sum4 += v[jrow]*x[7*idx[jrow]+3];
1189       sum5 += v[jrow]*x[7*idx[jrow]+4];
1190       sum6 += v[jrow]*x[7*idx[jrow]+5];
1191       sum7 += v[jrow]*x[7*idx[jrow]+6];
1192       jrow++;
1193      }
1194     y[7*i]   += sum1;
1195     y[7*i+1] += sum2;
1196     y[7*i+2] += sum3;
1197     y[7*i+3] += sum4;
1198     y[7*i+4] += sum5;
1199     y[7*i+5] += sum6;
1200     y[7*i+6] += sum7;
1201   }
1202 
1203   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1204   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1205   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 #undef __FUNCT__
1210 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1211 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1212 {
1213   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1214   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1215   const PetscScalar *x,*v;
1216   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1217   PetscErrorCode    ierr;
1218   const PetscInt    m = b->AIJ->rmap->n,*idx;
1219   PetscInt          n,i;
1220 
1221   PetscFunctionBegin;
1222   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1223   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1224   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1225   for (i=0; i<m; i++) {
1226     idx    = a->j + a->i[i] ;
1227     v      = a->a + a->i[i] ;
1228     n      = a->i[i+1] - a->i[i];
1229     alpha1 = x[7*i];
1230     alpha2 = x[7*i+1];
1231     alpha3 = x[7*i+2];
1232     alpha4 = x[7*i+3];
1233     alpha5 = x[7*i+4];
1234     alpha6 = x[7*i+5];
1235     alpha7 = x[7*i+6];
1236     while (n-->0) {
1237       y[7*(*idx)]   += alpha1*(*v);
1238       y[7*(*idx)+1] += alpha2*(*v);
1239       y[7*(*idx)+2] += alpha3*(*v);
1240       y[7*(*idx)+3] += alpha4*(*v);
1241       y[7*(*idx)+4] += alpha5*(*v);
1242       y[7*(*idx)+5] += alpha6*(*v);
1243       y[7*(*idx)+6] += alpha7*(*v);
1244       idx++; v++;
1245     }
1246   }
1247   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1248   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1249   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatMult_SeqMAIJ_8"
1255 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1256 {
1257   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1258   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1259   const PetscScalar *x,*v;
1260   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1261   PetscErrorCode    ierr;
1262   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1263   PetscInt          nonzerorow=0,n,i,jrow,j;
1264 
1265   PetscFunctionBegin;
1266   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1267   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1268   idx  = a->j;
1269   v    = a->a;
1270   ii   = a->i;
1271 
1272   for (i=0; i<m; i++) {
1273     jrow = ii[i];
1274     n    = ii[i+1] - jrow;
1275     sum1  = 0.0;
1276     sum2  = 0.0;
1277     sum3  = 0.0;
1278     sum4  = 0.0;
1279     sum5  = 0.0;
1280     sum6  = 0.0;
1281     sum7  = 0.0;
1282     sum8  = 0.0;
1283     nonzerorow += (n>0);
1284     for (j=0; j<n; j++) {
1285       sum1 += v[jrow]*x[8*idx[jrow]];
1286       sum2 += v[jrow]*x[8*idx[jrow]+1];
1287       sum3 += v[jrow]*x[8*idx[jrow]+2];
1288       sum4 += v[jrow]*x[8*idx[jrow]+3];
1289       sum5 += v[jrow]*x[8*idx[jrow]+4];
1290       sum6 += v[jrow]*x[8*idx[jrow]+5];
1291       sum7 += v[jrow]*x[8*idx[jrow]+6];
1292       sum8 += v[jrow]*x[8*idx[jrow]+7];
1293       jrow++;
1294      }
1295     y[8*i]   = sum1;
1296     y[8*i+1] = sum2;
1297     y[8*i+2] = sum3;
1298     y[8*i+3] = sum4;
1299     y[8*i+4] = sum5;
1300     y[8*i+5] = sum6;
1301     y[8*i+6] = sum7;
1302     y[8*i+7] = sum8;
1303   }
1304 
1305   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
1306   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1307   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1313 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1314 {
1315   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1316   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1317   const PetscScalar *x,*v;
1318   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1319   PetscErrorCode    ierr;
1320   const PetscInt    m = b->AIJ->rmap->n,*idx;
1321   PetscInt          n,i;
1322 
1323   PetscFunctionBegin;
1324   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1325   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1326   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1327 
1328   for (i=0; i<m; i++) {
1329     idx    = a->j + a->i[i] ;
1330     v      = a->a + a->i[i] ;
1331     n      = a->i[i+1] - a->i[i];
1332     alpha1 = x[8*i];
1333     alpha2 = x[8*i+1];
1334     alpha3 = x[8*i+2];
1335     alpha4 = x[8*i+3];
1336     alpha5 = x[8*i+4];
1337     alpha6 = x[8*i+5];
1338     alpha7 = x[8*i+6];
1339     alpha8 = x[8*i+7];
1340     while (n-->0) {
1341       y[8*(*idx)]   += alpha1*(*v);
1342       y[8*(*idx)+1] += alpha2*(*v);
1343       y[8*(*idx)+2] += alpha3*(*v);
1344       y[8*(*idx)+3] += alpha4*(*v);
1345       y[8*(*idx)+4] += alpha5*(*v);
1346       y[8*(*idx)+5] += alpha6*(*v);
1347       y[8*(*idx)+6] += alpha7*(*v);
1348       y[8*(*idx)+7] += alpha8*(*v);
1349       idx++; v++;
1350     }
1351   }
1352   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1353   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1354   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1360 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1361 {
1362   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1363   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1364   const PetscScalar *x,*v;
1365   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1366   PetscErrorCode    ierr;
1367   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1368   PetscInt          n,i,jrow,j;
1369 
1370   PetscFunctionBegin;
1371   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1372   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1373   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1374   idx  = a->j;
1375   v    = a->a;
1376   ii   = a->i;
1377 
1378   for (i=0; i<m; i++) {
1379     jrow = ii[i];
1380     n    = ii[i+1] - jrow;
1381     sum1  = 0.0;
1382     sum2  = 0.0;
1383     sum3  = 0.0;
1384     sum4  = 0.0;
1385     sum5  = 0.0;
1386     sum6  = 0.0;
1387     sum7  = 0.0;
1388     sum8  = 0.0;
1389     for (j=0; j<n; j++) {
1390       sum1 += v[jrow]*x[8*idx[jrow]];
1391       sum2 += v[jrow]*x[8*idx[jrow]+1];
1392       sum3 += v[jrow]*x[8*idx[jrow]+2];
1393       sum4 += v[jrow]*x[8*idx[jrow]+3];
1394       sum5 += v[jrow]*x[8*idx[jrow]+4];
1395       sum6 += v[jrow]*x[8*idx[jrow]+5];
1396       sum7 += v[jrow]*x[8*idx[jrow]+6];
1397       sum8 += v[jrow]*x[8*idx[jrow]+7];
1398       jrow++;
1399      }
1400     y[8*i]   += sum1;
1401     y[8*i+1] += sum2;
1402     y[8*i+2] += sum3;
1403     y[8*i+3] += sum4;
1404     y[8*i+4] += sum5;
1405     y[8*i+5] += sum6;
1406     y[8*i+6] += sum7;
1407     y[8*i+7] += sum8;
1408   }
1409 
1410   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1411   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1412   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1418 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1419 {
1420   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1421   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1422   const PetscScalar *x,*v;
1423   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1424   PetscErrorCode    ierr;
1425   const PetscInt    m = b->AIJ->rmap->n,*idx;
1426   PetscInt          n,i;
1427 
1428   PetscFunctionBegin;
1429   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1430   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1431   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1432   for (i=0; i<m; i++) {
1433     idx    = a->j + a->i[i] ;
1434     v      = a->a + a->i[i] ;
1435     n      = a->i[i+1] - a->i[i];
1436     alpha1 = x[8*i];
1437     alpha2 = x[8*i+1];
1438     alpha3 = x[8*i+2];
1439     alpha4 = x[8*i+3];
1440     alpha5 = x[8*i+4];
1441     alpha6 = x[8*i+5];
1442     alpha7 = x[8*i+6];
1443     alpha8 = x[8*i+7];
1444     while (n-->0) {
1445       y[8*(*idx)]   += alpha1*(*v);
1446       y[8*(*idx)+1] += alpha2*(*v);
1447       y[8*(*idx)+2] += alpha3*(*v);
1448       y[8*(*idx)+3] += alpha4*(*v);
1449       y[8*(*idx)+4] += alpha5*(*v);
1450       y[8*(*idx)+5] += alpha6*(*v);
1451       y[8*(*idx)+6] += alpha7*(*v);
1452       y[8*(*idx)+7] += alpha8*(*v);
1453       idx++; v++;
1454     }
1455   }
1456   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1457   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1458   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1459   PetscFunctionReturn(0);
1460 }
1461 
1462 /* ------------------------------------------------------------------------------*/
1463 #undef __FUNCT__
1464 #define __FUNCT__ "MatMult_SeqMAIJ_9"
1465 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1466 {
1467   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1468   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1469   const PetscScalar *x,*v;
1470   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1471   PetscErrorCode    ierr;
1472   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1473   PetscInt          nonzerorow=0,n,i,jrow,j;
1474 
1475   PetscFunctionBegin;
1476   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1477   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1478   idx  = a->j;
1479   v    = a->a;
1480   ii   = a->i;
1481 
1482   for (i=0; i<m; i++) {
1483     jrow = ii[i];
1484     n    = ii[i+1] - jrow;
1485     sum1  = 0.0;
1486     sum2  = 0.0;
1487     sum3  = 0.0;
1488     sum4  = 0.0;
1489     sum5  = 0.0;
1490     sum6  = 0.0;
1491     sum7  = 0.0;
1492     sum8  = 0.0;
1493     sum9  = 0.0;
1494     nonzerorow += (n>0);
1495     for (j=0; j<n; j++) {
1496       sum1 += v[jrow]*x[9*idx[jrow]];
1497       sum2 += v[jrow]*x[9*idx[jrow]+1];
1498       sum3 += v[jrow]*x[9*idx[jrow]+2];
1499       sum4 += v[jrow]*x[9*idx[jrow]+3];
1500       sum5 += v[jrow]*x[9*idx[jrow]+4];
1501       sum6 += v[jrow]*x[9*idx[jrow]+5];
1502       sum7 += v[jrow]*x[9*idx[jrow]+6];
1503       sum8 += v[jrow]*x[9*idx[jrow]+7];
1504       sum9 += v[jrow]*x[9*idx[jrow]+8];
1505       jrow++;
1506      }
1507     y[9*i]   = sum1;
1508     y[9*i+1] = sum2;
1509     y[9*i+2] = sum3;
1510     y[9*i+3] = sum4;
1511     y[9*i+4] = sum5;
1512     y[9*i+5] = sum6;
1513     y[9*i+6] = sum7;
1514     y[9*i+7] = sum8;
1515     y[9*i+8] = sum9;
1516   }
1517 
1518   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
1519   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1520   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 /* ------------------------------------------------------------------------------*/
1525 
1526 #undef __FUNCT__
1527 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1528 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1529 {
1530   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1531   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1532   const PetscScalar *x,*v;
1533   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1534   PetscErrorCode    ierr;
1535   const PetscInt    m = b->AIJ->rmap->n,*idx;
1536   PetscInt          n,i;
1537 
1538   PetscFunctionBegin;
1539   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1540   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1541   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1542 
1543   for (i=0; i<m; i++) {
1544     idx    = a->j + a->i[i] ;
1545     v      = a->a + a->i[i] ;
1546     n      = a->i[i+1] - a->i[i];
1547     alpha1 = x[9*i];
1548     alpha2 = x[9*i+1];
1549     alpha3 = x[9*i+2];
1550     alpha4 = x[9*i+3];
1551     alpha5 = x[9*i+4];
1552     alpha6 = x[9*i+5];
1553     alpha7 = x[9*i+6];
1554     alpha8 = x[9*i+7];
1555     alpha9 = x[9*i+8];
1556     while (n-->0) {
1557       y[9*(*idx)]   += alpha1*(*v);
1558       y[9*(*idx)+1] += alpha2*(*v);
1559       y[9*(*idx)+2] += alpha3*(*v);
1560       y[9*(*idx)+3] += alpha4*(*v);
1561       y[9*(*idx)+4] += alpha5*(*v);
1562       y[9*(*idx)+5] += alpha6*(*v);
1563       y[9*(*idx)+6] += alpha7*(*v);
1564       y[9*(*idx)+7] += alpha8*(*v);
1565       y[9*(*idx)+8] += alpha9*(*v);
1566       idx++; v++;
1567     }
1568   }
1569   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1570   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1571   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 #undef __FUNCT__
1576 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1577 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1578 {
1579   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1580   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1581   const PetscScalar *x,*v;
1582   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1583   PetscErrorCode    ierr;
1584   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1585   PetscInt          n,i,jrow,j;
1586 
1587   PetscFunctionBegin;
1588   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1589   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1590   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1591   idx  = a->j;
1592   v    = a->a;
1593   ii   = a->i;
1594 
1595   for (i=0; i<m; i++) {
1596     jrow = ii[i];
1597     n    = ii[i+1] - jrow;
1598     sum1  = 0.0;
1599     sum2  = 0.0;
1600     sum3  = 0.0;
1601     sum4  = 0.0;
1602     sum5  = 0.0;
1603     sum6  = 0.0;
1604     sum7  = 0.0;
1605     sum8  = 0.0;
1606     sum9  = 0.0;
1607     for (j=0; j<n; j++) {
1608       sum1 += v[jrow]*x[9*idx[jrow]];
1609       sum2 += v[jrow]*x[9*idx[jrow]+1];
1610       sum3 += v[jrow]*x[9*idx[jrow]+2];
1611       sum4 += v[jrow]*x[9*idx[jrow]+3];
1612       sum5 += v[jrow]*x[9*idx[jrow]+4];
1613       sum6 += v[jrow]*x[9*idx[jrow]+5];
1614       sum7 += v[jrow]*x[9*idx[jrow]+6];
1615       sum8 += v[jrow]*x[9*idx[jrow]+7];
1616       sum9 += v[jrow]*x[9*idx[jrow]+8];
1617       jrow++;
1618      }
1619     y[9*i]   += sum1;
1620     y[9*i+1] += sum2;
1621     y[9*i+2] += sum3;
1622     y[9*i+3] += sum4;
1623     y[9*i+4] += sum5;
1624     y[9*i+5] += sum6;
1625     y[9*i+6] += sum7;
1626     y[9*i+7] += sum8;
1627     y[9*i+8] += sum9;
1628   }
1629 
1630   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1631   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1632   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1633   PetscFunctionReturn(0);
1634 }
1635 
1636 #undef __FUNCT__
1637 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1638 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1639 {
1640   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1641   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1642   const PetscScalar *x,*v;
1643   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1644   PetscErrorCode    ierr;
1645   const PetscInt    m = b->AIJ->rmap->n,*idx;
1646   PetscInt          n,i;
1647 
1648   PetscFunctionBegin;
1649   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1650   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1651   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1652   for (i=0; i<m; i++) {
1653     idx    = a->j + a->i[i] ;
1654     v      = a->a + a->i[i] ;
1655     n      = a->i[i+1] - a->i[i];
1656     alpha1 = x[9*i];
1657     alpha2 = x[9*i+1];
1658     alpha3 = x[9*i+2];
1659     alpha4 = x[9*i+3];
1660     alpha5 = x[9*i+4];
1661     alpha6 = x[9*i+5];
1662     alpha7 = x[9*i+6];
1663     alpha8 = x[9*i+7];
1664     alpha9 = x[9*i+8];
1665     while (n-->0) {
1666       y[9*(*idx)]   += alpha1*(*v);
1667       y[9*(*idx)+1] += alpha2*(*v);
1668       y[9*(*idx)+2] += alpha3*(*v);
1669       y[9*(*idx)+3] += alpha4*(*v);
1670       y[9*(*idx)+4] += alpha5*(*v);
1671       y[9*(*idx)+5] += alpha6*(*v);
1672       y[9*(*idx)+6] += alpha7*(*v);
1673       y[9*(*idx)+7] += alpha8*(*v);
1674       y[9*(*idx)+8] += alpha9*(*v);
1675       idx++; v++;
1676     }
1677   }
1678   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1679   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1680   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1681   PetscFunctionReturn(0);
1682 }
1683 #undef __FUNCT__
1684 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1685 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1686 {
1687   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1688   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1689   const PetscScalar *x,*v;
1690   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1691   PetscErrorCode    ierr;
1692   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1693   PetscInt          nonzerorow=0,n,i,jrow,j;
1694 
1695   PetscFunctionBegin;
1696   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1697   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1698   idx  = a->j;
1699   v    = a->a;
1700   ii   = a->i;
1701 
1702   for (i=0; i<m; i++) {
1703     jrow = ii[i];
1704     n    = ii[i+1] - jrow;
1705     sum1  = 0.0;
1706     sum2  = 0.0;
1707     sum3  = 0.0;
1708     sum4  = 0.0;
1709     sum5  = 0.0;
1710     sum6  = 0.0;
1711     sum7  = 0.0;
1712     sum8  = 0.0;
1713     sum9  = 0.0;
1714     sum10 = 0.0;
1715     nonzerorow += (n>0);
1716     for (j=0; j<n; j++) {
1717       sum1  += v[jrow]*x[10*idx[jrow]];
1718       sum2  += v[jrow]*x[10*idx[jrow]+1];
1719       sum3  += v[jrow]*x[10*idx[jrow]+2];
1720       sum4  += v[jrow]*x[10*idx[jrow]+3];
1721       sum5  += v[jrow]*x[10*idx[jrow]+4];
1722       sum6  += v[jrow]*x[10*idx[jrow]+5];
1723       sum7  += v[jrow]*x[10*idx[jrow]+6];
1724       sum8  += v[jrow]*x[10*idx[jrow]+7];
1725       sum9  += v[jrow]*x[10*idx[jrow]+8];
1726       sum10 += v[jrow]*x[10*idx[jrow]+9];
1727       jrow++;
1728      }
1729     y[10*i]   = sum1;
1730     y[10*i+1] = sum2;
1731     y[10*i+2] = sum3;
1732     y[10*i+3] = sum4;
1733     y[10*i+4] = sum5;
1734     y[10*i+5] = sum6;
1735     y[10*i+6] = sum7;
1736     y[10*i+7] = sum8;
1737     y[10*i+8] = sum9;
1738     y[10*i+9] = sum10;
1739   }
1740 
1741   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
1742   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1743   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1744   PetscFunctionReturn(0);
1745 }
1746 
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatMultAdd_SeqMAIJ_10"
1749 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1750 {
1751   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1752   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1753   const PetscScalar *x,*v;
1754   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1755   PetscErrorCode    ierr;
1756   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1757   PetscInt          n,i,jrow,j;
1758 
1759   PetscFunctionBegin;
1760   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1761   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1762   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1763   idx  = a->j;
1764   v    = a->a;
1765   ii   = a->i;
1766 
1767   for (i=0; i<m; i++) {
1768     jrow = ii[i];
1769     n    = ii[i+1] - jrow;
1770     sum1  = 0.0;
1771     sum2  = 0.0;
1772     sum3  = 0.0;
1773     sum4  = 0.0;
1774     sum5  = 0.0;
1775     sum6  = 0.0;
1776     sum7  = 0.0;
1777     sum8  = 0.0;
1778     sum9  = 0.0;
1779     sum10 = 0.0;
1780     for (j=0; j<n; j++) {
1781       sum1  += v[jrow]*x[10*idx[jrow]];
1782       sum2  += v[jrow]*x[10*idx[jrow]+1];
1783       sum3  += v[jrow]*x[10*idx[jrow]+2];
1784       sum4  += v[jrow]*x[10*idx[jrow]+3];
1785       sum5  += v[jrow]*x[10*idx[jrow]+4];
1786       sum6  += v[jrow]*x[10*idx[jrow]+5];
1787       sum7  += v[jrow]*x[10*idx[jrow]+6];
1788       sum8  += v[jrow]*x[10*idx[jrow]+7];
1789       sum9  += v[jrow]*x[10*idx[jrow]+8];
1790       sum10 += v[jrow]*x[10*idx[jrow]+9];
1791       jrow++;
1792      }
1793     y[10*i]   += sum1;
1794     y[10*i+1] += sum2;
1795     y[10*i+2] += sum3;
1796     y[10*i+3] += sum4;
1797     y[10*i+4] += sum5;
1798     y[10*i+5] += sum6;
1799     y[10*i+6] += sum7;
1800     y[10*i+7] += sum8;
1801     y[10*i+8] += sum9;
1802     y[10*i+9] += sum10;
1803   }
1804 
1805   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1806   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1807   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1808   PetscFunctionReturn(0);
1809 }
1810 
1811 #undef __FUNCT__
1812 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1813 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1814 {
1815   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1816   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1817   const PetscScalar *x,*v;
1818   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1819   PetscErrorCode    ierr;
1820   const PetscInt    m = b->AIJ->rmap->n,*idx;
1821   PetscInt          n,i;
1822 
1823   PetscFunctionBegin;
1824   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1825   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1826   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1827 
1828   for (i=0; i<m; i++) {
1829     idx    = a->j + a->i[i] ;
1830     v      = a->a + a->i[i] ;
1831     n      = a->i[i+1] - a->i[i];
1832     alpha1 = x[10*i];
1833     alpha2 = x[10*i+1];
1834     alpha3 = x[10*i+2];
1835     alpha4 = x[10*i+3];
1836     alpha5 = x[10*i+4];
1837     alpha6 = x[10*i+5];
1838     alpha7 = x[10*i+6];
1839     alpha8 = x[10*i+7];
1840     alpha9 = x[10*i+8];
1841     alpha10 = x[10*i+9];
1842     while (n-->0) {
1843       y[10*(*idx)]   += alpha1*(*v);
1844       y[10*(*idx)+1] += alpha2*(*v);
1845       y[10*(*idx)+2] += alpha3*(*v);
1846       y[10*(*idx)+3] += alpha4*(*v);
1847       y[10*(*idx)+4] += alpha5*(*v);
1848       y[10*(*idx)+5] += alpha6*(*v);
1849       y[10*(*idx)+6] += alpha7*(*v);
1850       y[10*(*idx)+7] += alpha8*(*v);
1851       y[10*(*idx)+8] += alpha9*(*v);
1852       y[10*(*idx)+9] += alpha10*(*v);
1853       idx++; v++;
1854     }
1855   }
1856   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1857   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1858   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1859   PetscFunctionReturn(0);
1860 }
1861 
1862 #undef __FUNCT__
1863 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1864 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1865 {
1866   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1867   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1868   const PetscScalar *x,*v;
1869   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1870   PetscErrorCode    ierr;
1871   const PetscInt    m = b->AIJ->rmap->n,*idx;
1872   PetscInt          n,i;
1873 
1874   PetscFunctionBegin;
1875   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1876   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1877   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1878   for (i=0; i<m; i++) {
1879     idx    = a->j + a->i[i] ;
1880     v      = a->a + a->i[i] ;
1881     n      = a->i[i+1] - a->i[i];
1882     alpha1 = x[10*i];
1883     alpha2 = x[10*i+1];
1884     alpha3 = x[10*i+2];
1885     alpha4 = x[10*i+3];
1886     alpha5 = x[10*i+4];
1887     alpha6 = x[10*i+5];
1888     alpha7 = x[10*i+6];
1889     alpha8 = x[10*i+7];
1890     alpha9 = x[10*i+8];
1891     alpha10 = x[10*i+9];
1892     while (n-->0) {
1893       y[10*(*idx)]   += alpha1*(*v);
1894       y[10*(*idx)+1] += alpha2*(*v);
1895       y[10*(*idx)+2] += alpha3*(*v);
1896       y[10*(*idx)+3] += alpha4*(*v);
1897       y[10*(*idx)+4] += alpha5*(*v);
1898       y[10*(*idx)+5] += alpha6*(*v);
1899       y[10*(*idx)+6] += alpha7*(*v);
1900       y[10*(*idx)+7] += alpha8*(*v);
1901       y[10*(*idx)+8] += alpha9*(*v);
1902       y[10*(*idx)+9] += alpha10*(*v);
1903       idx++; v++;
1904     }
1905   }
1906   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1907   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1908   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 
1913 /*--------------------------------------------------------------------------------------------*/
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatMult_SeqMAIJ_11"
1916 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1917 {
1918   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1919   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1920   const PetscScalar *x,*v;
1921   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1922   PetscErrorCode    ierr;
1923   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1924   PetscInt          nonzerorow=0,n,i,jrow,j;
1925 
1926   PetscFunctionBegin;
1927   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1928   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1929   idx  = a->j;
1930   v    = a->a;
1931   ii   = a->i;
1932 
1933   for (i=0; i<m; i++) {
1934     jrow = ii[i];
1935     n    = ii[i+1] - jrow;
1936     sum1  = 0.0;
1937     sum2  = 0.0;
1938     sum3  = 0.0;
1939     sum4  = 0.0;
1940     sum5  = 0.0;
1941     sum6  = 0.0;
1942     sum7  = 0.0;
1943     sum8  = 0.0;
1944     sum9  = 0.0;
1945     sum10 = 0.0;
1946     sum11 = 0.0;
1947     nonzerorow += (n>0);
1948     for (j=0; j<n; j++) {
1949       sum1  += v[jrow]*x[11*idx[jrow]];
1950       sum2  += v[jrow]*x[11*idx[jrow]+1];
1951       sum3  += v[jrow]*x[11*idx[jrow]+2];
1952       sum4  += v[jrow]*x[11*idx[jrow]+3];
1953       sum5  += v[jrow]*x[11*idx[jrow]+4];
1954       sum6  += v[jrow]*x[11*idx[jrow]+5];
1955       sum7  += v[jrow]*x[11*idx[jrow]+6];
1956       sum8  += v[jrow]*x[11*idx[jrow]+7];
1957       sum9  += v[jrow]*x[11*idx[jrow]+8];
1958       sum10 += v[jrow]*x[11*idx[jrow]+9];
1959       sum11 += v[jrow]*x[11*idx[jrow]+10];
1960       jrow++;
1961      }
1962     y[11*i]   = sum1;
1963     y[11*i+1] = sum2;
1964     y[11*i+2] = sum3;
1965     y[11*i+3] = sum4;
1966     y[11*i+4] = sum5;
1967     y[11*i+5] = sum6;
1968     y[11*i+6] = sum7;
1969     y[11*i+7] = sum8;
1970     y[11*i+8] = sum9;
1971     y[11*i+9] = sum10;
1972     y[11*i+10] = sum11;
1973   }
1974 
1975   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
1976   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1977   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 #undef __FUNCT__
1982 #define __FUNCT__ "MatMultAdd_SeqMAIJ_11"
1983 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1984 {
1985   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1986   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1987   const PetscScalar *x,*v;
1988   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1989   PetscErrorCode    ierr;
1990   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1991   PetscInt          n,i,jrow,j;
1992 
1993   PetscFunctionBegin;
1994   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1995   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1996   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1997   idx  = a->j;
1998   v    = a->a;
1999   ii   = a->i;
2000 
2001   for (i=0; i<m; i++) {
2002     jrow = ii[i];
2003     n    = ii[i+1] - jrow;
2004     sum1  = 0.0;
2005     sum2  = 0.0;
2006     sum3  = 0.0;
2007     sum4  = 0.0;
2008     sum5  = 0.0;
2009     sum6  = 0.0;
2010     sum7  = 0.0;
2011     sum8  = 0.0;
2012     sum9  = 0.0;
2013     sum10 = 0.0;
2014     sum11 = 0.0;
2015     for (j=0; j<n; j++) {
2016       sum1  += v[jrow]*x[11*idx[jrow]];
2017       sum2  += v[jrow]*x[11*idx[jrow]+1];
2018       sum3  += v[jrow]*x[11*idx[jrow]+2];
2019       sum4  += v[jrow]*x[11*idx[jrow]+3];
2020       sum5  += v[jrow]*x[11*idx[jrow]+4];
2021       sum6  += v[jrow]*x[11*idx[jrow]+5];
2022       sum7  += v[jrow]*x[11*idx[jrow]+6];
2023       sum8  += v[jrow]*x[11*idx[jrow]+7];
2024       sum9  += v[jrow]*x[11*idx[jrow]+8];
2025       sum10 += v[jrow]*x[11*idx[jrow]+9];
2026       sum11 += v[jrow]*x[11*idx[jrow]+10];
2027       jrow++;
2028      }
2029     y[11*i]   += sum1;
2030     y[11*i+1] += sum2;
2031     y[11*i+2] += sum3;
2032     y[11*i+3] += sum4;
2033     y[11*i+4] += sum5;
2034     y[11*i+5] += sum6;
2035     y[11*i+6] += sum7;
2036     y[11*i+7] += sum8;
2037     y[11*i+8] += sum9;
2038     y[11*i+9] += sum10;
2039     y[11*i+10] += sum11;
2040   }
2041 
2042   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2043   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2044   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2045   PetscFunctionReturn(0);
2046 }
2047 
2048 #undef __FUNCT__
2049 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11"
2050 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2051 {
2052   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2053   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2054   const PetscScalar *x,*v;
2055   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2056   PetscErrorCode    ierr;
2057   const PetscInt    m = b->AIJ->rmap->n,*idx;
2058   PetscInt          n,i;
2059 
2060   PetscFunctionBegin;
2061   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2062   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2063   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2064 
2065   for (i=0; i<m; i++) {
2066     idx    = a->j + a->i[i] ;
2067     v      = a->a + a->i[i] ;
2068     n      = a->i[i+1] - a->i[i];
2069     alpha1 = x[11*i];
2070     alpha2 = x[11*i+1];
2071     alpha3 = x[11*i+2];
2072     alpha4 = x[11*i+3];
2073     alpha5 = x[11*i+4];
2074     alpha6 = x[11*i+5];
2075     alpha7 = x[11*i+6];
2076     alpha8 = x[11*i+7];
2077     alpha9 = x[11*i+8];
2078     alpha10 = x[11*i+9];
2079     alpha11 = x[11*i+10];
2080     while (n-->0) {
2081       y[11*(*idx)]   += alpha1*(*v);
2082       y[11*(*idx)+1] += alpha2*(*v);
2083       y[11*(*idx)+2] += alpha3*(*v);
2084       y[11*(*idx)+3] += alpha4*(*v);
2085       y[11*(*idx)+4] += alpha5*(*v);
2086       y[11*(*idx)+5] += alpha6*(*v);
2087       y[11*(*idx)+6] += alpha7*(*v);
2088       y[11*(*idx)+7] += alpha8*(*v);
2089       y[11*(*idx)+8] += alpha9*(*v);
2090       y[11*(*idx)+9] += alpha10*(*v);
2091       y[11*(*idx)+10] += alpha11*(*v);
2092       idx++; v++;
2093     }
2094   }
2095   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2096   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2097   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 #undef __FUNCT__
2102 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11"
2103 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2104 {
2105   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2106   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2107   const PetscScalar *x,*v;
2108   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2109   PetscErrorCode    ierr;
2110   const PetscInt    m = b->AIJ->rmap->n,*idx;
2111   PetscInt          n,i;
2112 
2113   PetscFunctionBegin;
2114   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2115   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2116   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2117   for (i=0; i<m; i++) {
2118     idx    = a->j + a->i[i] ;
2119     v      = a->a + a->i[i] ;
2120     n      = a->i[i+1] - a->i[i];
2121     alpha1 = x[11*i];
2122     alpha2 = x[11*i+1];
2123     alpha3 = x[11*i+2];
2124     alpha4 = x[11*i+3];
2125     alpha5 = x[11*i+4];
2126     alpha6 = x[11*i+5];
2127     alpha7 = x[11*i+6];
2128     alpha8 = x[11*i+7];
2129     alpha9 = x[11*i+8];
2130     alpha10 = x[11*i+9];
2131     alpha11 = x[11*i+10];
2132     while (n-->0) {
2133       y[11*(*idx)]   += alpha1*(*v);
2134       y[11*(*idx)+1] += alpha2*(*v);
2135       y[11*(*idx)+2] += alpha3*(*v);
2136       y[11*(*idx)+3] += alpha4*(*v);
2137       y[11*(*idx)+4] += alpha5*(*v);
2138       y[11*(*idx)+5] += alpha6*(*v);
2139       y[11*(*idx)+6] += alpha7*(*v);
2140       y[11*(*idx)+7] += alpha8*(*v);
2141       y[11*(*idx)+8] += alpha9*(*v);
2142       y[11*(*idx)+9] += alpha10*(*v);
2143       y[11*(*idx)+10] += alpha11*(*v);
2144       idx++; v++;
2145     }
2146   }
2147   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2148   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2149   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 
2154 /*--------------------------------------------------------------------------------------------*/
2155 #undef __FUNCT__
2156 #define __FUNCT__ "MatMult_SeqMAIJ_16"
2157 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2158 {
2159   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2160   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2161   const PetscScalar *x,*v;
2162   PetscScalar        *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2163   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2164   PetscErrorCode     ierr;
2165   const PetscInt     m = b->AIJ->rmap->n,*idx,*ii;
2166   PetscInt           nonzerorow=0,n,i,jrow,j;
2167 
2168   PetscFunctionBegin;
2169   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2170   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2171   idx  = a->j;
2172   v    = a->a;
2173   ii   = a->i;
2174 
2175   for (i=0; i<m; i++) {
2176     jrow = ii[i];
2177     n    = ii[i+1] - jrow;
2178     sum1  = 0.0;
2179     sum2  = 0.0;
2180     sum3  = 0.0;
2181     sum4  = 0.0;
2182     sum5  = 0.0;
2183     sum6  = 0.0;
2184     sum7  = 0.0;
2185     sum8  = 0.0;
2186     sum9  = 0.0;
2187     sum10 = 0.0;
2188     sum11 = 0.0;
2189     sum12 = 0.0;
2190     sum13 = 0.0;
2191     sum14 = 0.0;
2192     sum15 = 0.0;
2193     sum16 = 0.0;
2194     nonzerorow += (n>0);
2195     for (j=0; j<n; j++) {
2196       sum1  += v[jrow]*x[16*idx[jrow]];
2197       sum2  += v[jrow]*x[16*idx[jrow]+1];
2198       sum3  += v[jrow]*x[16*idx[jrow]+2];
2199       sum4  += v[jrow]*x[16*idx[jrow]+3];
2200       sum5  += v[jrow]*x[16*idx[jrow]+4];
2201       sum6  += v[jrow]*x[16*idx[jrow]+5];
2202       sum7  += v[jrow]*x[16*idx[jrow]+6];
2203       sum8  += v[jrow]*x[16*idx[jrow]+7];
2204       sum9  += v[jrow]*x[16*idx[jrow]+8];
2205       sum10 += v[jrow]*x[16*idx[jrow]+9];
2206       sum11 += v[jrow]*x[16*idx[jrow]+10];
2207       sum12 += v[jrow]*x[16*idx[jrow]+11];
2208       sum13 += v[jrow]*x[16*idx[jrow]+12];
2209       sum14 += v[jrow]*x[16*idx[jrow]+13];
2210       sum15 += v[jrow]*x[16*idx[jrow]+14];
2211       sum16 += v[jrow]*x[16*idx[jrow]+15];
2212       jrow++;
2213      }
2214     y[16*i]    = sum1;
2215     y[16*i+1]  = sum2;
2216     y[16*i+2]  = sum3;
2217     y[16*i+3]  = sum4;
2218     y[16*i+4]  = sum5;
2219     y[16*i+5]  = sum6;
2220     y[16*i+6]  = sum7;
2221     y[16*i+7]  = sum8;
2222     y[16*i+8]  = sum9;
2223     y[16*i+9]  = sum10;
2224     y[16*i+10] = sum11;
2225     y[16*i+11] = sum12;
2226     y[16*i+12] = sum13;
2227     y[16*i+13] = sum14;
2228     y[16*i+14] = sum15;
2229     y[16*i+15] = sum16;
2230   }
2231 
2232   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
2233   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2234   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2235   PetscFunctionReturn(0);
2236 }
2237 
2238 #undef __FUNCT__
2239 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
2240 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2241 {
2242   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2243   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2244   const PetscScalar *x,*v;
2245   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2246   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2247   PetscErrorCode    ierr;
2248   const PetscInt    m = b->AIJ->rmap->n,*idx;
2249   PetscInt          n,i;
2250 
2251   PetscFunctionBegin;
2252   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2253   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2254   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2255 
2256   for (i=0; i<m; i++) {
2257     idx    = a->j + a->i[i] ;
2258     v      = a->a + a->i[i] ;
2259     n      = a->i[i+1] - a->i[i];
2260     alpha1  = x[16*i];
2261     alpha2  = x[16*i+1];
2262     alpha3  = x[16*i+2];
2263     alpha4  = x[16*i+3];
2264     alpha5  = x[16*i+4];
2265     alpha6  = x[16*i+5];
2266     alpha7  = x[16*i+6];
2267     alpha8  = x[16*i+7];
2268     alpha9  = x[16*i+8];
2269     alpha10 = x[16*i+9];
2270     alpha11 = x[16*i+10];
2271     alpha12 = x[16*i+11];
2272     alpha13 = x[16*i+12];
2273     alpha14 = x[16*i+13];
2274     alpha15 = x[16*i+14];
2275     alpha16 = x[16*i+15];
2276     while (n-->0) {
2277       y[16*(*idx)]    += alpha1*(*v);
2278       y[16*(*idx)+1]  += alpha2*(*v);
2279       y[16*(*idx)+2]  += alpha3*(*v);
2280       y[16*(*idx)+3]  += alpha4*(*v);
2281       y[16*(*idx)+4]  += alpha5*(*v);
2282       y[16*(*idx)+5]  += alpha6*(*v);
2283       y[16*(*idx)+6]  += alpha7*(*v);
2284       y[16*(*idx)+7]  += alpha8*(*v);
2285       y[16*(*idx)+8]  += alpha9*(*v);
2286       y[16*(*idx)+9]  += alpha10*(*v);
2287       y[16*(*idx)+10] += alpha11*(*v);
2288       y[16*(*idx)+11] += alpha12*(*v);
2289       y[16*(*idx)+12] += alpha13*(*v);
2290       y[16*(*idx)+13] += alpha14*(*v);
2291       y[16*(*idx)+14] += alpha15*(*v);
2292       y[16*(*idx)+15] += alpha16*(*v);
2293       idx++; v++;
2294     }
2295   }
2296   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2297   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2298   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 #undef __FUNCT__
2303 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
2304 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2305 {
2306   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2307   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2308   const PetscScalar *x,*v;
2309   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2310   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2311   PetscErrorCode    ierr;
2312   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2313   PetscInt          n,i,jrow,j;
2314 
2315   PetscFunctionBegin;
2316   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2317   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2318   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2319   idx  = a->j;
2320   v    = a->a;
2321   ii   = a->i;
2322 
2323   for (i=0; i<m; i++) {
2324     jrow = ii[i];
2325     n    = ii[i+1] - jrow;
2326     sum1  = 0.0;
2327     sum2  = 0.0;
2328     sum3  = 0.0;
2329     sum4  = 0.0;
2330     sum5  = 0.0;
2331     sum6  = 0.0;
2332     sum7  = 0.0;
2333     sum8  = 0.0;
2334     sum9  = 0.0;
2335     sum10 = 0.0;
2336     sum11 = 0.0;
2337     sum12 = 0.0;
2338     sum13 = 0.0;
2339     sum14 = 0.0;
2340     sum15 = 0.0;
2341     sum16 = 0.0;
2342     for (j=0; j<n; j++) {
2343       sum1  += v[jrow]*x[16*idx[jrow]];
2344       sum2  += v[jrow]*x[16*idx[jrow]+1];
2345       sum3  += v[jrow]*x[16*idx[jrow]+2];
2346       sum4  += v[jrow]*x[16*idx[jrow]+3];
2347       sum5  += v[jrow]*x[16*idx[jrow]+4];
2348       sum6  += v[jrow]*x[16*idx[jrow]+5];
2349       sum7  += v[jrow]*x[16*idx[jrow]+6];
2350       sum8  += v[jrow]*x[16*idx[jrow]+7];
2351       sum9  += v[jrow]*x[16*idx[jrow]+8];
2352       sum10 += v[jrow]*x[16*idx[jrow]+9];
2353       sum11 += v[jrow]*x[16*idx[jrow]+10];
2354       sum12 += v[jrow]*x[16*idx[jrow]+11];
2355       sum13 += v[jrow]*x[16*idx[jrow]+12];
2356       sum14 += v[jrow]*x[16*idx[jrow]+13];
2357       sum15 += v[jrow]*x[16*idx[jrow]+14];
2358       sum16 += v[jrow]*x[16*idx[jrow]+15];
2359       jrow++;
2360      }
2361     y[16*i]    += sum1;
2362     y[16*i+1]  += sum2;
2363     y[16*i+2]  += sum3;
2364     y[16*i+3]  += sum4;
2365     y[16*i+4]  += sum5;
2366     y[16*i+5]  += sum6;
2367     y[16*i+6]  += sum7;
2368     y[16*i+7]  += sum8;
2369     y[16*i+8]  += sum9;
2370     y[16*i+9]  += sum10;
2371     y[16*i+10] += sum11;
2372     y[16*i+11] += sum12;
2373     y[16*i+12] += sum13;
2374     y[16*i+13] += sum14;
2375     y[16*i+14] += sum15;
2376     y[16*i+15] += sum16;
2377   }
2378 
2379   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2380   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2381   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 #undef __FUNCT__
2386 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2387 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2388 {
2389   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2390   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2391   const PetscScalar *x,*v;
2392   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2393   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2394   PetscErrorCode    ierr;
2395   const PetscInt    m = b->AIJ->rmap->n,*idx;
2396   PetscInt          n,i;
2397 
2398   PetscFunctionBegin;
2399   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2400   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2401   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2402   for (i=0; i<m; i++) {
2403     idx    = a->j + a->i[i] ;
2404     v      = a->a + a->i[i] ;
2405     n      = a->i[i+1] - a->i[i];
2406     alpha1 = x[16*i];
2407     alpha2 = x[16*i+1];
2408     alpha3 = x[16*i+2];
2409     alpha4 = x[16*i+3];
2410     alpha5 = x[16*i+4];
2411     alpha6 = x[16*i+5];
2412     alpha7 = x[16*i+6];
2413     alpha8 = x[16*i+7];
2414     alpha9  = x[16*i+8];
2415     alpha10 = x[16*i+9];
2416     alpha11 = x[16*i+10];
2417     alpha12 = x[16*i+11];
2418     alpha13 = x[16*i+12];
2419     alpha14 = x[16*i+13];
2420     alpha15 = x[16*i+14];
2421     alpha16 = x[16*i+15];
2422     while (n-->0) {
2423       y[16*(*idx)]   += alpha1*(*v);
2424       y[16*(*idx)+1] += alpha2*(*v);
2425       y[16*(*idx)+2] += alpha3*(*v);
2426       y[16*(*idx)+3] += alpha4*(*v);
2427       y[16*(*idx)+4] += alpha5*(*v);
2428       y[16*(*idx)+5] += alpha6*(*v);
2429       y[16*(*idx)+6] += alpha7*(*v);
2430       y[16*(*idx)+7] += alpha8*(*v);
2431       y[16*(*idx)+8]  += alpha9*(*v);
2432       y[16*(*idx)+9]  += alpha10*(*v);
2433       y[16*(*idx)+10] += alpha11*(*v);
2434       y[16*(*idx)+11] += alpha12*(*v);
2435       y[16*(*idx)+12] += alpha13*(*v);
2436       y[16*(*idx)+13] += alpha14*(*v);
2437       y[16*(*idx)+14] += alpha15*(*v);
2438       y[16*(*idx)+15] += alpha16*(*v);
2439       idx++; v++;
2440     }
2441   }
2442   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2443   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2444   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2445   PetscFunctionReturn(0);
2446 }
2447 
2448 /*--------------------------------------------------------------------------------------------*/
2449 #undef __FUNCT__
2450 #define __FUNCT__ "MatMult_SeqMAIJ_18"
2451 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2452 {
2453   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2454   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2455   const PetscScalar *x,*v;
2456   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2457   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2458   PetscErrorCode    ierr;
2459   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2460   PetscInt          nonzerorow=0,n,i,jrow,j;
2461 
2462   PetscFunctionBegin;
2463   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2464   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2465   idx  = a->j;
2466   v    = a->a;
2467   ii   = a->i;
2468 
2469   for (i=0; i<m; i++) {
2470     jrow = ii[i];
2471     n    = ii[i+1] - jrow;
2472     sum1  = 0.0;
2473     sum2  = 0.0;
2474     sum3  = 0.0;
2475     sum4  = 0.0;
2476     sum5  = 0.0;
2477     sum6  = 0.0;
2478     sum7  = 0.0;
2479     sum8  = 0.0;
2480     sum9  = 0.0;
2481     sum10 = 0.0;
2482     sum11 = 0.0;
2483     sum12 = 0.0;
2484     sum13 = 0.0;
2485     sum14 = 0.0;
2486     sum15 = 0.0;
2487     sum16 = 0.0;
2488     sum17 = 0.0;
2489     sum18 = 0.0;
2490     nonzerorow += (n>0);
2491     for (j=0; j<n; j++) {
2492       sum1  += v[jrow]*x[18*idx[jrow]];
2493       sum2  += v[jrow]*x[18*idx[jrow]+1];
2494       sum3  += v[jrow]*x[18*idx[jrow]+2];
2495       sum4  += v[jrow]*x[18*idx[jrow]+3];
2496       sum5  += v[jrow]*x[18*idx[jrow]+4];
2497       sum6  += v[jrow]*x[18*idx[jrow]+5];
2498       sum7  += v[jrow]*x[18*idx[jrow]+6];
2499       sum8  += v[jrow]*x[18*idx[jrow]+7];
2500       sum9  += v[jrow]*x[18*idx[jrow]+8];
2501       sum10 += v[jrow]*x[18*idx[jrow]+9];
2502       sum11 += v[jrow]*x[18*idx[jrow]+10];
2503       sum12 += v[jrow]*x[18*idx[jrow]+11];
2504       sum13 += v[jrow]*x[18*idx[jrow]+12];
2505       sum14 += v[jrow]*x[18*idx[jrow]+13];
2506       sum15 += v[jrow]*x[18*idx[jrow]+14];
2507       sum16 += v[jrow]*x[18*idx[jrow]+15];
2508       sum17 += v[jrow]*x[18*idx[jrow]+16];
2509       sum18 += v[jrow]*x[18*idx[jrow]+17];
2510       jrow++;
2511      }
2512     y[18*i]    = sum1;
2513     y[18*i+1]  = sum2;
2514     y[18*i+2]  = sum3;
2515     y[18*i+3]  = sum4;
2516     y[18*i+4]  = sum5;
2517     y[18*i+5]  = sum6;
2518     y[18*i+6]  = sum7;
2519     y[18*i+7]  = sum8;
2520     y[18*i+8]  = sum9;
2521     y[18*i+9]  = sum10;
2522     y[18*i+10] = sum11;
2523     y[18*i+11] = sum12;
2524     y[18*i+12] = sum13;
2525     y[18*i+13] = sum14;
2526     y[18*i+14] = sum15;
2527     y[18*i+15] = sum16;
2528     y[18*i+16] = sum17;
2529     y[18*i+17] = sum18;
2530   }
2531 
2532   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
2533   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2534   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 #undef __FUNCT__
2539 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18"
2540 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2541 {
2542   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2543   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2544   const PetscScalar *x,*v;
2545   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2546   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2547   PetscErrorCode    ierr;
2548   const PetscInt    m = b->AIJ->rmap->n,*idx;
2549   PetscInt          n,i;
2550 
2551   PetscFunctionBegin;
2552   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2553   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2554   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2555 
2556   for (i=0; i<m; i++) {
2557     idx    = a->j + a->i[i] ;
2558     v      = a->a + a->i[i] ;
2559     n      = a->i[i+1] - a->i[i];
2560     alpha1  = x[18*i];
2561     alpha2  = x[18*i+1];
2562     alpha3  = x[18*i+2];
2563     alpha4  = x[18*i+3];
2564     alpha5  = x[18*i+4];
2565     alpha6  = x[18*i+5];
2566     alpha7  = x[18*i+6];
2567     alpha8  = x[18*i+7];
2568     alpha9  = x[18*i+8];
2569     alpha10 = x[18*i+9];
2570     alpha11 = x[18*i+10];
2571     alpha12 = x[18*i+11];
2572     alpha13 = x[18*i+12];
2573     alpha14 = x[18*i+13];
2574     alpha15 = x[18*i+14];
2575     alpha16 = x[18*i+15];
2576     alpha17 = x[18*i+16];
2577     alpha18 = x[18*i+17];
2578     while (n-->0) {
2579       y[18*(*idx)]    += alpha1*(*v);
2580       y[18*(*idx)+1]  += alpha2*(*v);
2581       y[18*(*idx)+2]  += alpha3*(*v);
2582       y[18*(*idx)+3]  += alpha4*(*v);
2583       y[18*(*idx)+4]  += alpha5*(*v);
2584       y[18*(*idx)+5]  += alpha6*(*v);
2585       y[18*(*idx)+6]  += alpha7*(*v);
2586       y[18*(*idx)+7]  += alpha8*(*v);
2587       y[18*(*idx)+8]  += alpha9*(*v);
2588       y[18*(*idx)+9]  += alpha10*(*v);
2589       y[18*(*idx)+10] += alpha11*(*v);
2590       y[18*(*idx)+11] += alpha12*(*v);
2591       y[18*(*idx)+12] += alpha13*(*v);
2592       y[18*(*idx)+13] += alpha14*(*v);
2593       y[18*(*idx)+14] += alpha15*(*v);
2594       y[18*(*idx)+15] += alpha16*(*v);
2595       y[18*(*idx)+16] += alpha17*(*v);
2596       y[18*(*idx)+17] += alpha18*(*v);
2597       idx++; v++;
2598     }
2599   }
2600   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2601   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2602   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2603   PetscFunctionReturn(0);
2604 }
2605 
2606 #undef __FUNCT__
2607 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18"
2608 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2609 {
2610   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2611   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2612   const PetscScalar *x,*v;
2613   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2614   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2615   PetscErrorCode    ierr;
2616   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2617   PetscInt          n,i,jrow,j;
2618 
2619   PetscFunctionBegin;
2620   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2621   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2622   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2623   idx  = a->j;
2624   v    = a->a;
2625   ii   = a->i;
2626 
2627   for (i=0; i<m; i++) {
2628     jrow = ii[i];
2629     n    = ii[i+1] - jrow;
2630     sum1  = 0.0;
2631     sum2  = 0.0;
2632     sum3  = 0.0;
2633     sum4  = 0.0;
2634     sum5  = 0.0;
2635     sum6  = 0.0;
2636     sum7  = 0.0;
2637     sum8  = 0.0;
2638     sum9  = 0.0;
2639     sum10 = 0.0;
2640     sum11 = 0.0;
2641     sum12 = 0.0;
2642     sum13 = 0.0;
2643     sum14 = 0.0;
2644     sum15 = 0.0;
2645     sum16 = 0.0;
2646     sum17 = 0.0;
2647     sum18 = 0.0;
2648     for (j=0; j<n; j++) {
2649       sum1  += v[jrow]*x[18*idx[jrow]];
2650       sum2  += v[jrow]*x[18*idx[jrow]+1];
2651       sum3  += v[jrow]*x[18*idx[jrow]+2];
2652       sum4  += v[jrow]*x[18*idx[jrow]+3];
2653       sum5  += v[jrow]*x[18*idx[jrow]+4];
2654       sum6  += v[jrow]*x[18*idx[jrow]+5];
2655       sum7  += v[jrow]*x[18*idx[jrow]+6];
2656       sum8  += v[jrow]*x[18*idx[jrow]+7];
2657       sum9  += v[jrow]*x[18*idx[jrow]+8];
2658       sum10 += v[jrow]*x[18*idx[jrow]+9];
2659       sum11 += v[jrow]*x[18*idx[jrow]+10];
2660       sum12 += v[jrow]*x[18*idx[jrow]+11];
2661       sum13 += v[jrow]*x[18*idx[jrow]+12];
2662       sum14 += v[jrow]*x[18*idx[jrow]+13];
2663       sum15 += v[jrow]*x[18*idx[jrow]+14];
2664       sum16 += v[jrow]*x[18*idx[jrow]+15];
2665       sum17 += v[jrow]*x[18*idx[jrow]+16];
2666       sum18 += v[jrow]*x[18*idx[jrow]+17];
2667       jrow++;
2668      }
2669     y[18*i]    += sum1;
2670     y[18*i+1]  += sum2;
2671     y[18*i+2]  += sum3;
2672     y[18*i+3]  += sum4;
2673     y[18*i+4]  += sum5;
2674     y[18*i+5]  += sum6;
2675     y[18*i+6]  += sum7;
2676     y[18*i+7]  += sum8;
2677     y[18*i+8]  += sum9;
2678     y[18*i+9]  += sum10;
2679     y[18*i+10] += sum11;
2680     y[18*i+11] += sum12;
2681     y[18*i+12] += sum13;
2682     y[18*i+13] += sum14;
2683     y[18*i+14] += sum15;
2684     y[18*i+15] += sum16;
2685     y[18*i+16] += sum17;
2686     y[18*i+17] += sum18;
2687   }
2688 
2689   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2690   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2691   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2692   PetscFunctionReturn(0);
2693 }
2694 
2695 #undef __FUNCT__
2696 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18"
2697 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2698 {
2699   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2700   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2701   const PetscScalar *x,*v;
2702   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2703   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2704   PetscErrorCode    ierr;
2705   const PetscInt    m = b->AIJ->rmap->n,*idx;
2706   PetscInt          n,i;
2707 
2708   PetscFunctionBegin;
2709   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2710   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2711   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2712   for (i=0; i<m; i++) {
2713     idx    = a->j + a->i[i] ;
2714     v      = a->a + a->i[i] ;
2715     n      = a->i[i+1] - a->i[i];
2716     alpha1 = x[18*i];
2717     alpha2 = x[18*i+1];
2718     alpha3 = x[18*i+2];
2719     alpha4 = x[18*i+3];
2720     alpha5 = x[18*i+4];
2721     alpha6 = x[18*i+5];
2722     alpha7 = x[18*i+6];
2723     alpha8 = x[18*i+7];
2724     alpha9  = x[18*i+8];
2725     alpha10 = x[18*i+9];
2726     alpha11 = x[18*i+10];
2727     alpha12 = x[18*i+11];
2728     alpha13 = x[18*i+12];
2729     alpha14 = x[18*i+13];
2730     alpha15 = x[18*i+14];
2731     alpha16 = x[18*i+15];
2732     alpha17 = x[18*i+16];
2733     alpha18 = x[18*i+17];
2734     while (n-->0) {
2735       y[18*(*idx)]   += alpha1*(*v);
2736       y[18*(*idx)+1] += alpha2*(*v);
2737       y[18*(*idx)+2] += alpha3*(*v);
2738       y[18*(*idx)+3] += alpha4*(*v);
2739       y[18*(*idx)+4] += alpha5*(*v);
2740       y[18*(*idx)+5] += alpha6*(*v);
2741       y[18*(*idx)+6] += alpha7*(*v);
2742       y[18*(*idx)+7] += alpha8*(*v);
2743       y[18*(*idx)+8]  += alpha9*(*v);
2744       y[18*(*idx)+9]  += alpha10*(*v);
2745       y[18*(*idx)+10] += alpha11*(*v);
2746       y[18*(*idx)+11] += alpha12*(*v);
2747       y[18*(*idx)+12] += alpha13*(*v);
2748       y[18*(*idx)+13] += alpha14*(*v);
2749       y[18*(*idx)+14] += alpha15*(*v);
2750       y[18*(*idx)+15] += alpha16*(*v);
2751       y[18*(*idx)+16] += alpha17*(*v);
2752       y[18*(*idx)+17] += alpha18*(*v);
2753       idx++; v++;
2754     }
2755   }
2756   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2757   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2758   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2759   PetscFunctionReturn(0);
2760 }
2761 
2762 #undef __FUNCT__
2763 #define __FUNCT__ "MatMult_SeqMAIJ_N"
2764 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2765 {
2766   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2767   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2768   const PetscScalar *x,*v;
2769   PetscScalar       *y,*sums;
2770   PetscErrorCode    ierr;
2771   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2772   PetscInt          n,i,jrow,j,dof = b->dof,k;
2773 
2774   PetscFunctionBegin;
2775   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2776   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2777   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2778   idx  = a->j;
2779   v    = a->a;
2780   ii   = a->i;
2781 
2782   for (i=0; i<m; i++) {
2783     jrow = ii[i];
2784     n    = ii[i+1] - jrow;
2785     sums = y + dof*i;
2786     for (j=0; j<n; j++) {
2787       for (k=0; k<dof; k++) {
2788         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
2789       }
2790       jrow++;
2791     }
2792   }
2793 
2794   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2795   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2796   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2797   PetscFunctionReturn(0);
2798 }
2799 
2800 #undef __FUNCT__
2801 #define __FUNCT__ "MatMultAdd_SeqMAIJ_N"
2802 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2803 {
2804   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2805   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2806   const PetscScalar *x,*v;
2807   PetscScalar       *y,*sums;
2808   PetscErrorCode    ierr;
2809   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2810   PetscInt          n,i,jrow,j,dof = b->dof,k;
2811 
2812   PetscFunctionBegin;
2813   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2814   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2815   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2816   idx  = a->j;
2817   v    = a->a;
2818   ii   = a->i;
2819 
2820   for (i=0; i<m; i++) {
2821     jrow = ii[i];
2822     n    = ii[i+1] - jrow;
2823     sums = y + dof*i;
2824     for (j=0; j<n; j++) {
2825       for (k=0; k<dof; k++) {
2826         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
2827       }
2828       jrow++;
2829     }
2830   }
2831 
2832   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2833   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2834   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2835   PetscFunctionReturn(0);
2836 }
2837 
2838 #undef __FUNCT__
2839 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N"
2840 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2841 {
2842   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2843   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2844   const PetscScalar *x,*v,*alpha;
2845   PetscScalar       *y;
2846   PetscErrorCode    ierr;
2847   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2848   PetscInt          n,i,k;
2849 
2850   PetscFunctionBegin;
2851   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2852   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2853   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2854   for (i=0; i<m; i++) {
2855     idx    = a->j + a->i[i] ;
2856     v      = a->a + a->i[i] ;
2857     n      = a->i[i+1] - a->i[i];
2858     alpha  = x + dof*i;
2859     while (n-->0) {
2860       for (k=0; k<dof; k++) {
2861         y[dof*(*idx)+k] += alpha[k]*(*v);
2862       }
2863       idx++; v++;
2864     }
2865   }
2866   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2867   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2868   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2869   PetscFunctionReturn(0);
2870 }
2871 
2872 #undef __FUNCT__
2873 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N"
2874 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2875 {
2876   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2877   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2878   const PetscScalar *x,*v,*alpha;
2879   PetscScalar       *y;
2880   PetscErrorCode    ierr;
2881   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2882   PetscInt          n,i,k;
2883 
2884   PetscFunctionBegin;
2885   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2886   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2887   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2888   for (i=0; i<m; i++) {
2889     idx    = a->j + a->i[i] ;
2890     v      = a->a + a->i[i] ;
2891     n      = a->i[i+1] - a->i[i];
2892     alpha  = x + dof*i;
2893     while (n-->0) {
2894       for (k=0; k<dof; k++) {
2895         y[dof*(*idx)+k] += alpha[k]*(*v);
2896       }
2897       idx++; v++;
2898     }
2899   }
2900   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2901   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2902   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2903   PetscFunctionReturn(0);
2904 }
2905 
2906 /*===================================================================================*/
2907 #undef __FUNCT__
2908 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2909 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2910 {
2911   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2912   PetscErrorCode ierr;
2913 
2914   PetscFunctionBegin;
2915   /* start the scatter */
2916   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2917   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2918   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2919   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2920   PetscFunctionReturn(0);
2921 }
2922 
2923 #undef __FUNCT__
2924 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2925 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2926 {
2927   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2928   PetscErrorCode ierr;
2929 
2930   PetscFunctionBegin;
2931   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2932   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2933   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2934   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2935   PetscFunctionReturn(0);
2936 }
2937 
2938 #undef __FUNCT__
2939 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2940 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2941 {
2942   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2943   PetscErrorCode ierr;
2944 
2945   PetscFunctionBegin;
2946   /* start the scatter */
2947   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2948   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2949   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2950   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2951   PetscFunctionReturn(0);
2952 }
2953 
2954 #undef __FUNCT__
2955 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2956 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2957 {
2958   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2959   PetscErrorCode ierr;
2960 
2961   PetscFunctionBegin;
2962   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2963   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2964   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2965   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2966   PetscFunctionReturn(0);
2967 }
2968 
2969 /* ----------------------------------------------------------------*/
2970 #undef __FUNCT__
2971 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
2972 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2973 {
2974   /* This routine requires testing -- but it's getting better. */
2975   PetscErrorCode     ierr;
2976   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2977   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2978   Mat                P=pp->AIJ;
2979   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2980   PetscInt           *pti,*ptj,*ptJ;
2981   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2982   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2983   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2984   MatScalar          *ca;
2985   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2986 
2987   PetscFunctionBegin;
2988   /* Start timer */
2989   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
2990 
2991   /* Get ij structure of P^T */
2992   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2993 
2994   cn = pn*ppdof;
2995   /* Allocate ci array, arrays for fill computation and */
2996   /* free space for accumulating nonzero column info */
2997   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
2998   ci[0] = 0;
2999 
3000   /* Work arrays for rows of P^T*A */
3001   ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr);
3002   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
3003   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
3004 
3005   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
3006   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
3007   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3008   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
3009   current_space = free_space;
3010 
3011   /* Determine symbolic info for each row of C: */
3012   for (i=0;i<pn;i++) {
3013     ptnzi  = pti[i+1] - pti[i];
3014     ptJ    = ptj + pti[i];
3015     for (dof=0;dof<ppdof;dof++) {
3016       ptanzi = 0;
3017       /* Determine symbolic row of PtA: */
3018       for (j=0;j<ptnzi;j++) {
3019         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
3020         arow = ptJ[j]*ppdof + dof;
3021         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
3022         anzj = ai[arow+1] - ai[arow];
3023         ajj  = aj + ai[arow];
3024         for (k=0;k<anzj;k++) {
3025           if (!ptadenserow[ajj[k]]) {
3026             ptadenserow[ajj[k]]    = -1;
3027             ptasparserow[ptanzi++] = ajj[k];
3028           }
3029         }
3030       }
3031       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
3032       ptaj = ptasparserow;
3033       cnzi   = 0;
3034       for (j=0;j<ptanzi;j++) {
3035         /* Get offset within block of P */
3036         pshift = *ptaj%ppdof;
3037         /* Get block row of P */
3038         prow = (*ptaj++)/ppdof; /* integer division */
3039         /* P has same number of nonzeros per row as the compressed form */
3040         pnzj = pi[prow+1] - pi[prow];
3041         pjj  = pj + pi[prow];
3042         for (k=0;k<pnzj;k++) {
3043           /* Locations in C are shifted by the offset within the block */
3044           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3045           if (!denserow[pjj[k]*ppdof+pshift]) {
3046             denserow[pjj[k]*ppdof+pshift] = -1;
3047             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
3048           }
3049         }
3050       }
3051 
3052       /* sort sparserow */
3053       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
3054 
3055       /* If free space is not available, make more free space */
3056       /* Double the amount of total space in the list */
3057       if (current_space->local_remaining<cnzi) {
3058         ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
3059       }
3060 
3061       /* Copy data into free space, and zero out denserows */
3062       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
3063       current_space->array           += cnzi;
3064       current_space->local_used      += cnzi;
3065       current_space->local_remaining -= cnzi;
3066 
3067       for (j=0;j<ptanzi;j++) {
3068         ptadenserow[ptasparserow[j]] = 0;
3069       }
3070       for (j=0;j<cnzi;j++) {
3071         denserow[sparserow[j]] = 0;
3072       }
3073       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3074       /*        For now, we will recompute what is needed. */
3075       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3076     }
3077   }
3078   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3079   /* Allocate space for cj, initialize cj, and */
3080   /* destroy list of free space and other temporary array(s) */
3081   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3082   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
3083   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
3084 
3085   /* Allocate space for ca */
3086   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
3087   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
3088 
3089   /* put together the new matrix */
3090   ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
3091 
3092   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3093   /* Since these are PETSc arrays, change flags to free them as necessary. */
3094   c          = (Mat_SeqAIJ *)((*C)->data);
3095   c->free_a  = PETSC_TRUE;
3096   c->free_ij = PETSC_TRUE;
3097   c->nonew   = 0;
3098 
3099   /* Clean up. */
3100   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
3101 
3102   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
3103   PetscFunctionReturn(0);
3104 }
3105 
3106 #undef __FUNCT__
3107 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
3108 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3109 {
3110   /* This routine requires testing -- first draft only */
3111   PetscErrorCode  ierr;
3112   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3113   Mat             P=pp->AIJ;
3114   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *) A->data;
3115   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *) P->data;
3116   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *) C->data;
3117   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
3118   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3119   const PetscInt  am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3120   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3121   const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj;
3122   MatScalar       *ca=c->a,*caj,*apa;
3123 
3124   PetscFunctionBegin;
3125   /* Allocate temporary array for storage of one row of A*P */
3126   ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr);
3127   ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr);
3128   ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr);
3129   ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr);
3130 
3131   /* Clear old values in C */
3132   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
3133 
3134   for (i=0;i<am;i++) {
3135     /* Form sparse row of A*P */
3136     anzi  = ai[i+1] - ai[i];
3137     apnzj = 0;
3138     for (j=0;j<anzi;j++) {
3139       /* Get offset within block of P */
3140       pshift = *aj%ppdof;
3141       /* Get block row of P */
3142       prow   = *aj++/ppdof; /* integer division */
3143       pnzj = pi[prow+1] - pi[prow];
3144       pjj  = pj + pi[prow];
3145       paj  = pa + pi[prow];
3146       for (k=0;k<pnzj;k++) {
3147         poffset = pjj[k]*ppdof+pshift;
3148         if (!apjdense[poffset]) {
3149           apjdense[poffset] = -1;
3150           apj[apnzj++]      = poffset;
3151         }
3152         apa[poffset] += (*aa)*paj[k];
3153       }
3154       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
3155       aa++;
3156     }
3157 
3158     /* Sort the j index array for quick sparse axpy. */
3159     /* Note: a array does not need sorting as it is in dense storage locations. */
3160     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
3161 
3162     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3163     prow    = i/ppdof; /* integer division */
3164     pshift  = i%ppdof;
3165     poffset = pi[prow];
3166     pnzi = pi[prow+1] - poffset;
3167     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3168     pJ   = pj+poffset;
3169     pA   = pa+poffset;
3170     for (j=0;j<pnzi;j++) {
3171       crow   = (*pJ)*ppdof+pshift;
3172       cjj    = cj + ci[crow];
3173       caj    = ca + ci[crow];
3174       pJ++;
3175       /* Perform sparse axpy operation.  Note cjj includes apj. */
3176       for (k=0,nextap=0;nextap<apnzj;k++) {
3177         if (cjj[k]==apj[nextap]) {
3178           caj[k] += (*pA)*apa[apj[nextap++]];
3179         }
3180       }
3181       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
3182       pA++;
3183     }
3184 
3185     /* Zero the current row info for A*P */
3186     for (j=0;j<apnzj;j++) {
3187       apa[apj[j]]      = 0.;
3188       apjdense[apj[j]] = 0;
3189     }
3190   }
3191 
3192   /* Assemble the final matrix and clean up */
3193   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3194   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3195   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
3196   PetscFunctionReturn(0);
3197 }
3198 
3199 #undef __FUNCT__
3200 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ"
3201 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3202 {
3203   PetscErrorCode    ierr;
3204 
3205   PetscFunctionBegin;
3206   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3207   ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr);
3208   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
3209   PetscFunctionReturn(0);
3210 }
3211 
3212 #undef __FUNCT__
3213 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ"
3214 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3215 {
3216   PetscFunctionBegin;
3217   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3218   PetscFunctionReturn(0);
3219 }
3220 
3221 EXTERN_C_BEGIN
3222 #undef __FUNCT__
3223 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
3224 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3225 {
3226   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
3227   Mat               a = b->AIJ,B;
3228   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
3229   PetscErrorCode    ierr;
3230   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3231   PetscInt          *cols;
3232   PetscScalar       *vals;
3233 
3234   PetscFunctionBegin;
3235   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3236   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
3237   for (i=0; i<m; i++) {
3238     nmax = PetscMax(nmax,aij->ilen[i]);
3239     for (j=0; j<dof; j++) {
3240       ilen[dof*i+j] = aij->ilen[i];
3241     }
3242   }
3243   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
3244   ierr = PetscFree(ilen);CHKERRQ(ierr);
3245   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
3246   ii   = 0;
3247   for (i=0; i<m; i++) {
3248     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3249     for (j=0; j<dof; j++) {
3250       for (k=0; k<ncols; k++) {
3251         icols[k] = dof*cols[k]+j;
3252       }
3253       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3254       ii++;
3255     }
3256     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3257   }
3258   ierr = PetscFree(icols);CHKERRQ(ierr);
3259   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3260   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3261 
3262   if (reuse == MAT_REUSE_MATRIX) {
3263     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3264   } else {
3265     *newmat = B;
3266   }
3267   PetscFunctionReturn(0);
3268 }
3269 EXTERN_C_END
3270 
3271 #include "../src/mat/impls/aij/mpi/mpiaij.h"
3272 
3273 EXTERN_C_BEGIN
3274 #undef __FUNCT__
3275 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
3276 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3277 {
3278   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
3279   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3280   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3281   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3282   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3283   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3284   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
3285   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
3286   PetscInt          rstart,cstart,*garray,ii,k;
3287   PetscErrorCode    ierr;
3288   PetscScalar       *vals,*ovals;
3289 
3290   PetscFunctionBegin;
3291   ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr);
3292   for (i=0; i<A->rmap->n/dof; i++) {
3293     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3294     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3295     for (j=0; j<dof; j++) {
3296       dnz[dof*i+j] = AIJ->ilen[i];
3297       onz[dof*i+j] = OAIJ->ilen[i];
3298     }
3299   }
3300   ierr = MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
3301   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
3302 
3303   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
3304   rstart = dof*maij->A->rmap->rstart;
3305   cstart = dof*maij->A->cmap->rstart;
3306   garray = mpiaij->garray;
3307 
3308   ii = rstart;
3309   for (i=0; i<A->rmap->n/dof; i++) {
3310     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3311     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3312     for (j=0; j<dof; j++) {
3313       for (k=0; k<ncols; k++) {
3314         icols[k] = cstart + dof*cols[k]+j;
3315       }
3316       for (k=0; k<oncols; k++) {
3317         oicols[k] = dof*garray[ocols[k]]+j;
3318       }
3319       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3320       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
3321       ii++;
3322     }
3323     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3324     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3325   }
3326   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
3327 
3328   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3329   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3330 
3331   if (reuse == MAT_REUSE_MATRIX) {
3332     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3333     ((PetscObject)A)->refct = 1;
3334     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3335     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3336   } else {
3337     *newmat = B;
3338   }
3339   PetscFunctionReturn(0);
3340 }
3341 EXTERN_C_END
3342 
3343 
3344 /* ---------------------------------------------------------------------------------- */
3345 #undef __FUNCT__
3346 #define __FUNCT__ "MatCreateMAIJ"
3347 /*@C
3348   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3349   operations for multicomponent problems.  It interpolates each component the same
3350   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3351   and MATMPIAIJ for distributed matrices.
3352 
3353   Collective
3354 
3355   Input Parameters:
3356 + A - the AIJ matrix describing the action on blocks
3357 - dof - the block size (number of components per node)
3358 
3359   Output Parameter:
3360 . maij - the new MAIJ matrix
3361 
3362   Operations provided:
3363 + MatMult
3364 . MatMultTranspose
3365 . MatMultAdd
3366 . MatMultTransposeAdd
3367 - MatView
3368 
3369   Level: advanced
3370 
3371 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension()
3372 @*/
3373 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3374 {
3375   PetscErrorCode ierr;
3376   PetscMPIInt    size;
3377   PetscInt       n;
3378   Mat            B;
3379 
3380   PetscFunctionBegin;
3381   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3382 
3383   if (dof == 1) {
3384     *maij = A;
3385   } else {
3386     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3387     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3388     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3389     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3390     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3391     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3392     B->assembled    = PETSC_TRUE;
3393 
3394     ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3395     if (size == 1) {
3396       Mat_SeqMAIJ    *b;
3397 
3398       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
3399       B->ops->destroy = MatDestroy_SeqMAIJ;
3400       B->ops->view    = MatView_SeqMAIJ;
3401       b      = (Mat_SeqMAIJ*)B->data;
3402       b->dof = dof;
3403       b->AIJ = A;
3404       if (dof == 2) {
3405         B->ops->mult             = MatMult_SeqMAIJ_2;
3406         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3407         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3408         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3409       } else if (dof == 3) {
3410         B->ops->mult             = MatMult_SeqMAIJ_3;
3411         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3412         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3413         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3414       } else if (dof == 4) {
3415         B->ops->mult             = MatMult_SeqMAIJ_4;
3416         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3417         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3418         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3419       } else if (dof == 5) {
3420         B->ops->mult             = MatMult_SeqMAIJ_5;
3421         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3422         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3423         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3424       } else if (dof == 6) {
3425         B->ops->mult             = MatMult_SeqMAIJ_6;
3426         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3427         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3428         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3429       } else if (dof == 7) {
3430         B->ops->mult             = MatMult_SeqMAIJ_7;
3431         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3432         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3433         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3434       } else if (dof == 8) {
3435         B->ops->mult             = MatMult_SeqMAIJ_8;
3436         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3437         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3438         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3439       } else if (dof == 9) {
3440         B->ops->mult             = MatMult_SeqMAIJ_9;
3441         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3442         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3443         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3444       } else if (dof == 10) {
3445         B->ops->mult             = MatMult_SeqMAIJ_10;
3446         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3447         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3448         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3449       } else if (dof == 11) {
3450         B->ops->mult             = MatMult_SeqMAIJ_11;
3451         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3452         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3453         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3454       } else if (dof == 16) {
3455         B->ops->mult             = MatMult_SeqMAIJ_16;
3456         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3457         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3458         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3459       } else if (dof == 18) {
3460         B->ops->mult             = MatMult_SeqMAIJ_18;
3461         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3462         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3463         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3464       } else {
3465         B->ops->mult             = MatMult_SeqMAIJ_N;
3466         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3467         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3468         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3469       }
3470       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
3471       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3472       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3473     } else {
3474       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3475       Mat_MPIMAIJ *b;
3476       IS          from,to;
3477       Vec         gvec;
3478       PetscInt    *garray,i;
3479 
3480       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3481       B->ops->destroy = MatDestroy_MPIMAIJ;
3482       B->ops->view    = MatView_MPIMAIJ;
3483       b      = (Mat_MPIMAIJ*)B->data;
3484       b->dof = dof;
3485       b->A   = A;
3486       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
3487       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
3488 
3489       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3490       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
3491       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3492 
3493       /* create two temporary Index sets for build scatter gather */
3494       ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
3495       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
3496       ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);CHKERRQ(ierr);
3497       ierr = PetscFree(garray);CHKERRQ(ierr);
3498       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3499 
3500       /* create temporary global vector to generate scatter context */
3501       ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
3502       ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr);
3503 
3504       /* generate the scatter context */
3505       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3506 
3507       ierr = ISDestroy(from);CHKERRQ(ierr);
3508       ierr = ISDestroy(to);CHKERRQ(ierr);
3509       ierr = VecDestroy(gvec);CHKERRQ(ierr);
3510 
3511       B->ops->mult             = MatMult_MPIMAIJ_dof;
3512       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3513       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3514       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3515       B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
3516       B->ops->ptapnumeric_mpiaij  = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
3517       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3518     }
3519     *maij = B;
3520     ierr = MatView_Private(B);CHKERRQ(ierr);
3521   }
3522   PetscFunctionReturn(0);
3523 }
3524