xref: /petsc/src/mat/impls/maij/maij.c (revision 419beca1004e80ebaf01ed86ca4fa04d30fb35e8)
1 
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20 #include <../src/mat/utils/freespace.h>
21 
22 #undef __FUNCT__
23 #define __FUNCT__ "MatMAIJGetAIJ"
24 /*@
25    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
26 
27    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
28 
29    Input Parameter:
30 .  A - the MAIJ matrix
31 
32    Output Parameter:
33 .  B - the AIJ matrix
34 
35    Level: advanced
36 
37    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
38 
39 .seealso: MatCreateMAIJ()
40 @*/
41 PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
42 {
43   PetscErrorCode ierr;
44   PetscBool      ismpimaij,isseqmaij;
45 
46   PetscFunctionBegin;
47   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
48   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
49   if (ismpimaij) {
50     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
51 
52     *B = b->A;
53   } else if (isseqmaij) {
54     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
55 
56     *B = b->AIJ;
57   } else {
58     *B = A;
59   }
60   PetscFunctionReturn(0);
61 }
62 
63 #undef __FUNCT__
64 #define __FUNCT__ "MatMAIJRedimension"
65 /*@C
66    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
67 
68    Logically Collective
69 
70    Input Parameter:
71 +  A - the MAIJ matrix
72 -  dof - the block size for the new matrix
73 
74    Output Parameter:
75 .  B - the new MAIJ matrix
76 
77    Level: advanced
78 
79 .seealso: MatCreateMAIJ()
80 @*/
81 PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
82 {
83   PetscErrorCode ierr;
84   Mat            Aij = NULL;
85 
86   PetscFunctionBegin;
87   PetscValidLogicalCollectiveInt(A,dof,2);
88   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
89   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
90   PetscFunctionReturn(0);
91 }
92 
93 #undef __FUNCT__
94 #define __FUNCT__ "MatDestroy_SeqMAIJ"
95 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
96 {
97   PetscErrorCode ierr;
98   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
99 
100   PetscFunctionBegin;
101   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
102   ierr = PetscFree(A->data);CHKERRQ(ierr);
103   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr);
104   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatSetUp_MAIJ"
110 PetscErrorCode MatSetUp_MAIJ(Mat A)
111 {
112   PetscFunctionBegin;
113   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
114   PetscFunctionReturn(0);
115 }
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "MatView_SeqMAIJ"
119 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
120 {
121   PetscErrorCode ierr;
122   Mat            B;
123 
124   PetscFunctionBegin;
125   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
126   ierr = MatView(B,viewer);CHKERRQ(ierr);
127   ierr = MatDestroy(&B);CHKERRQ(ierr);
128   PetscFunctionReturn(0);
129 }
130 
131 #undef __FUNCT__
132 #define __FUNCT__ "MatView_MPIMAIJ"
133 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
134 {
135   PetscErrorCode ierr;
136   Mat            B;
137 
138   PetscFunctionBegin;
139   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
140   ierr = MatView(B,viewer);CHKERRQ(ierr);
141   ierr = MatDestroy(&B);CHKERRQ(ierr);
142   PetscFunctionReturn(0);
143 }
144 
145 #undef __FUNCT__
146 #define __FUNCT__ "MatDestroy_MPIMAIJ"
147 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
148 {
149   PetscErrorCode ierr;
150   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
151 
152   PetscFunctionBegin;
153   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
154   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
155   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
156   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
157   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
158   ierr = PetscFree(A->data);CHKERRQ(ierr);
159   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr);
160   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr);
161   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
162   PetscFunctionReturn(0);
163 }
164 
165 /*MC
166   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
167   multicomponent problems, interpolating or restricting each component the same way independently.
168   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
169 
170   Operations provided:
171 . MatMult
172 . MatMultTranspose
173 . MatMultAdd
174 . MatMultTransposeAdd
175 
176   Level: advanced
177 
178 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
179 M*/
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatCreate_MAIJ"
183 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
184 {
185   PetscErrorCode ierr;
186   Mat_MPIMAIJ    *b;
187   PetscMPIInt    size;
188 
189   PetscFunctionBegin;
190   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
191   A->data  = (void*)b;
192 
193   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
194 
195   A->ops->setup = MatSetUp_MAIJ;
196 
197   b->AIJ  = 0;
198   b->dof  = 0;
199   b->OAIJ = 0;
200   b->ctx  = 0;
201   b->w    = 0;
202   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
203   if (size == 1) {
204     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
205   } else {
206     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
207   }
208   A->preallocated  = PETSC_TRUE;
209   A->assembled     = PETSC_TRUE;
210   PetscFunctionReturn(0);
211 }
212 
213 /* --------------------------------------------------------------------------------------*/
214 #undef __FUNCT__
215 #define __FUNCT__ "MatMult_SeqMAIJ_2"
216 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
217 {
218   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
219   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
220   const PetscScalar *x,*v;
221   PetscScalar       *y, sum1, sum2;
222   PetscErrorCode    ierr;
223   PetscInt          nonzerorow=0,n,i,jrow,j;
224   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
225 
226   PetscFunctionBegin;
227   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
228   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
229   idx  = a->j;
230   v    = a->a;
231   ii   = a->i;
232 
233   for (i=0; i<m; i++) {
234     jrow  = ii[i];
235     n     = ii[i+1] - jrow;
236     sum1  = 0.0;
237     sum2  = 0.0;
238 
239     nonzerorow += (n>0);
240     for (j=0; j<n; j++) {
241       sum1 += v[jrow]*x[2*idx[jrow]];
242       sum2 += v[jrow]*x[2*idx[jrow]+1];
243       jrow++;
244     }
245     y[2*i]   = sum1;
246     y[2*i+1] = sum2;
247   }
248 
249   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
250   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
251   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
257 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
258 {
259   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
260   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
261   const PetscScalar *x,*v;
262   PetscScalar       *y,alpha1,alpha2;
263   PetscErrorCode    ierr;
264   PetscInt          n,i;
265   const PetscInt    m = b->AIJ->rmap->n,*idx;
266 
267   PetscFunctionBegin;
268   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
269   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
270   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
271 
272   for (i=0; i<m; i++) {
273     idx    = a->j + a->i[i];
274     v      = a->a + a->i[i];
275     n      = a->i[i+1] - a->i[i];
276     alpha1 = x[2*i];
277     alpha2 = x[2*i+1];
278     while (n-->0) {
279       y[2*(*idx)]   += alpha1*(*v);
280       y[2*(*idx)+1] += alpha2*(*v);
281       idx++; v++;
282     }
283   }
284   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
285   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
286   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
287   PetscFunctionReturn(0);
288 }
289 
290 #undef __FUNCT__
291 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
292 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
293 {
294   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
295   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
296   const PetscScalar *x,*v;
297   PetscScalar       *y,sum1, sum2;
298   PetscErrorCode    ierr;
299   PetscInt          n,i,jrow,j;
300   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
301 
302   PetscFunctionBegin;
303   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
304   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
305   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
306   idx  = a->j;
307   v    = a->a;
308   ii   = a->i;
309 
310   for (i=0; i<m; i++) {
311     jrow = ii[i];
312     n    = ii[i+1] - jrow;
313     sum1 = 0.0;
314     sum2 = 0.0;
315     for (j=0; j<n; j++) {
316       sum1 += v[jrow]*x[2*idx[jrow]];
317       sum2 += v[jrow]*x[2*idx[jrow]+1];
318       jrow++;
319     }
320     y[2*i]   += sum1;
321     y[2*i+1] += sum2;
322   }
323 
324   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
325   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
326   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
327   PetscFunctionReturn(0);
328 }
329 #undef __FUNCT__
330 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
331 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
332 {
333   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
334   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
335   const PetscScalar *x,*v;
336   PetscScalar       *y,alpha1,alpha2;
337   PetscErrorCode    ierr;
338   PetscInt          n,i;
339   const PetscInt    m = b->AIJ->rmap->n,*idx;
340 
341   PetscFunctionBegin;
342   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
343   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
344   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
345 
346   for (i=0; i<m; i++) {
347     idx    = a->j + a->i[i];
348     v      = a->a + a->i[i];
349     n      = a->i[i+1] - a->i[i];
350     alpha1 = x[2*i];
351     alpha2 = x[2*i+1];
352     while (n-->0) {
353       y[2*(*idx)]   += alpha1*(*v);
354       y[2*(*idx)+1] += alpha2*(*v);
355       idx++; v++;
356     }
357   }
358   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
359   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
360   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
361   PetscFunctionReturn(0);
362 }
363 /* --------------------------------------------------------------------------------------*/
364 #undef __FUNCT__
365 #define __FUNCT__ "MatMult_SeqMAIJ_3"
366 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
367 {
368   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
369   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
370   const PetscScalar *x,*v;
371   PetscScalar       *y,sum1, sum2, sum3;
372   PetscErrorCode    ierr;
373   PetscInt          nonzerorow=0,n,i,jrow,j;
374   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
375 
376   PetscFunctionBegin;
377   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
378   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
379   idx  = a->j;
380   v    = a->a;
381   ii   = a->i;
382 
383   for (i=0; i<m; i++) {
384     jrow  = ii[i];
385     n     = ii[i+1] - jrow;
386     sum1  = 0.0;
387     sum2  = 0.0;
388     sum3  = 0.0;
389 
390     nonzerorow += (n>0);
391     for (j=0; j<n; j++) {
392       sum1 += v[jrow]*x[3*idx[jrow]];
393       sum2 += v[jrow]*x[3*idx[jrow]+1];
394       sum3 += v[jrow]*x[3*idx[jrow]+2];
395       jrow++;
396      }
397     y[3*i]   = sum1;
398     y[3*i+1] = sum2;
399     y[3*i+2] = sum3;
400   }
401 
402   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
403   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
404   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
410 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
411 {
412   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
413   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
414   const PetscScalar *x,*v;
415   PetscScalar       *y,alpha1,alpha2,alpha3;
416   PetscErrorCode    ierr;
417   PetscInt          n,i;
418   const PetscInt    m = b->AIJ->rmap->n,*idx;
419 
420   PetscFunctionBegin;
421   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
422   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
423   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
424 
425   for (i=0; i<m; i++) {
426     idx    = a->j + a->i[i];
427     v      = a->a + a->i[i];
428     n      = a->i[i+1] - a->i[i];
429     alpha1 = x[3*i];
430     alpha2 = x[3*i+1];
431     alpha3 = x[3*i+2];
432     while (n-->0) {
433       y[3*(*idx)]   += alpha1*(*v);
434       y[3*(*idx)+1] += alpha2*(*v);
435       y[3*(*idx)+2] += alpha3*(*v);
436       idx++; v++;
437     }
438   }
439   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
440   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
441   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
442   PetscFunctionReturn(0);
443 }
444 
445 #undef __FUNCT__
446 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
447 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
448 {
449   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
450   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
451   const PetscScalar *x,*v;
452   PetscScalar       *y,sum1, sum2, sum3;
453   PetscErrorCode    ierr;
454   PetscInt          n,i,jrow,j;
455   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
456 
457   PetscFunctionBegin;
458   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
459   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
460   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
461   idx  = a->j;
462   v    = a->a;
463   ii   = a->i;
464 
465   for (i=0; i<m; i++) {
466     jrow = ii[i];
467     n    = ii[i+1] - jrow;
468     sum1 = 0.0;
469     sum2 = 0.0;
470     sum3 = 0.0;
471     for (j=0; j<n; j++) {
472       sum1 += v[jrow]*x[3*idx[jrow]];
473       sum2 += v[jrow]*x[3*idx[jrow]+1];
474       sum3 += v[jrow]*x[3*idx[jrow]+2];
475       jrow++;
476     }
477     y[3*i]   += sum1;
478     y[3*i+1] += sum2;
479     y[3*i+2] += sum3;
480   }
481 
482   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
483   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
484   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
485   PetscFunctionReturn(0);
486 }
487 #undef __FUNCT__
488 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
489 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
490 {
491   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
492   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
493   const PetscScalar *x,*v;
494   PetscScalar       *y,alpha1,alpha2,alpha3;
495   PetscErrorCode    ierr;
496   PetscInt          n,i;
497   const PetscInt    m = b->AIJ->rmap->n,*idx;
498 
499   PetscFunctionBegin;
500   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
501   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
502   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
503   for (i=0; i<m; i++) {
504     idx    = a->j + a->i[i];
505     v      = a->a + a->i[i];
506     n      = a->i[i+1] - a->i[i];
507     alpha1 = x[3*i];
508     alpha2 = x[3*i+1];
509     alpha3 = x[3*i+2];
510     while (n-->0) {
511       y[3*(*idx)]   += alpha1*(*v);
512       y[3*(*idx)+1] += alpha2*(*v);
513       y[3*(*idx)+2] += alpha3*(*v);
514       idx++; v++;
515     }
516   }
517   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
518   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
519   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
520   PetscFunctionReturn(0);
521 }
522 
523 /* ------------------------------------------------------------------------------*/
524 #undef __FUNCT__
525 #define __FUNCT__ "MatMult_SeqMAIJ_4"
526 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
527 {
528   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
529   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
530   const PetscScalar *x,*v;
531   PetscScalar       *y,sum1, sum2, sum3, sum4;
532   PetscErrorCode    ierr;
533   PetscInt          nonzerorow=0,n,i,jrow,j;
534   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
535 
536   PetscFunctionBegin;
537   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
538   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
539   idx  = a->j;
540   v    = a->a;
541   ii   = a->i;
542 
543   for (i=0; i<m; i++) {
544     jrow        = ii[i];
545     n           = ii[i+1] - jrow;
546     sum1        = 0.0;
547     sum2        = 0.0;
548     sum3        = 0.0;
549     sum4        = 0.0;
550     nonzerorow += (n>0);
551     for (j=0; j<n; j++) {
552       sum1 += v[jrow]*x[4*idx[jrow]];
553       sum2 += v[jrow]*x[4*idx[jrow]+1];
554       sum3 += v[jrow]*x[4*idx[jrow]+2];
555       sum4 += v[jrow]*x[4*idx[jrow]+3];
556       jrow++;
557     }
558     y[4*i]   = sum1;
559     y[4*i+1] = sum2;
560     y[4*i+2] = sum3;
561     y[4*i+3] = sum4;
562   }
563 
564   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
565   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
566   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
567   PetscFunctionReturn(0);
568 }
569 
570 #undef __FUNCT__
571 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
572 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
573 {
574   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
575   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
576   const PetscScalar *x,*v;
577   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
578   PetscErrorCode    ierr;
579   PetscInt          n,i;
580   const PetscInt    m = b->AIJ->rmap->n,*idx;
581 
582   PetscFunctionBegin;
583   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
584   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
585   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
586   for (i=0; i<m; i++) {
587     idx    = a->j + a->i[i];
588     v      = a->a + a->i[i];
589     n      = a->i[i+1] - a->i[i];
590     alpha1 = x[4*i];
591     alpha2 = x[4*i+1];
592     alpha3 = x[4*i+2];
593     alpha4 = x[4*i+3];
594     while (n-->0) {
595       y[4*(*idx)]   += alpha1*(*v);
596       y[4*(*idx)+1] += alpha2*(*v);
597       y[4*(*idx)+2] += alpha3*(*v);
598       y[4*(*idx)+3] += alpha4*(*v);
599       idx++; v++;
600     }
601   }
602   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
603   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
604   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNCT__
609 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
610 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
611 {
612   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
613   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
614   const PetscScalar *x,*v;
615   PetscScalar       *y,sum1, sum2, sum3, sum4;
616   PetscErrorCode    ierr;
617   PetscInt          n,i,jrow,j;
618   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
619 
620   PetscFunctionBegin;
621   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
622   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
623   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
624   idx  = a->j;
625   v    = a->a;
626   ii   = a->i;
627 
628   for (i=0; i<m; i++) {
629     jrow = ii[i];
630     n    = ii[i+1] - jrow;
631     sum1 = 0.0;
632     sum2 = 0.0;
633     sum3 = 0.0;
634     sum4 = 0.0;
635     for (j=0; j<n; j++) {
636       sum1 += v[jrow]*x[4*idx[jrow]];
637       sum2 += v[jrow]*x[4*idx[jrow]+1];
638       sum3 += v[jrow]*x[4*idx[jrow]+2];
639       sum4 += v[jrow]*x[4*idx[jrow]+3];
640       jrow++;
641     }
642     y[4*i]   += sum1;
643     y[4*i+1] += sum2;
644     y[4*i+2] += sum3;
645     y[4*i+3] += sum4;
646   }
647 
648   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
649   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
650   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
651   PetscFunctionReturn(0);
652 }
653 #undef __FUNCT__
654 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
655 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
656 {
657   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
658   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
659   const PetscScalar *x,*v;
660   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
661   PetscErrorCode    ierr;
662   PetscInt          n,i;
663   const PetscInt    m = b->AIJ->rmap->n,*idx;
664 
665   PetscFunctionBegin;
666   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
667   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
668   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
669 
670   for (i=0; i<m; i++) {
671     idx    = a->j + a->i[i];
672     v      = a->a + a->i[i];
673     n      = a->i[i+1] - a->i[i];
674     alpha1 = x[4*i];
675     alpha2 = x[4*i+1];
676     alpha3 = x[4*i+2];
677     alpha4 = x[4*i+3];
678     while (n-->0) {
679       y[4*(*idx)]   += alpha1*(*v);
680       y[4*(*idx)+1] += alpha2*(*v);
681       y[4*(*idx)+2] += alpha3*(*v);
682       y[4*(*idx)+3] += alpha4*(*v);
683       idx++; v++;
684     }
685   }
686   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
687   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
688   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
689   PetscFunctionReturn(0);
690 }
691 /* ------------------------------------------------------------------------------*/
692 
693 #undef __FUNCT__
694 #define __FUNCT__ "MatMult_SeqMAIJ_5"
695 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
696 {
697   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
698   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
699   const PetscScalar *x,*v;
700   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
701   PetscErrorCode    ierr;
702   PetscInt          nonzerorow=0,n,i,jrow,j;
703   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
704 
705   PetscFunctionBegin;
706   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
707   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
708   idx  = a->j;
709   v    = a->a;
710   ii   = a->i;
711 
712   for (i=0; i<m; i++) {
713     jrow  = ii[i];
714     n     = ii[i+1] - jrow;
715     sum1  = 0.0;
716     sum2  = 0.0;
717     sum3  = 0.0;
718     sum4  = 0.0;
719     sum5  = 0.0;
720 
721     nonzerorow += (n>0);
722     for (j=0; j<n; j++) {
723       sum1 += v[jrow]*x[5*idx[jrow]];
724       sum2 += v[jrow]*x[5*idx[jrow]+1];
725       sum3 += v[jrow]*x[5*idx[jrow]+2];
726       sum4 += v[jrow]*x[5*idx[jrow]+3];
727       sum5 += v[jrow]*x[5*idx[jrow]+4];
728       jrow++;
729     }
730     y[5*i]   = sum1;
731     y[5*i+1] = sum2;
732     y[5*i+2] = sum3;
733     y[5*i+3] = sum4;
734     y[5*i+4] = sum5;
735   }
736 
737   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
738   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
739   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
745 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
746 {
747   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
748   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
749   const PetscScalar *x,*v;
750   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
751   PetscErrorCode    ierr;
752   PetscInt          n,i;
753   const PetscInt    m = b->AIJ->rmap->n,*idx;
754 
755   PetscFunctionBegin;
756   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
757   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
758   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
759 
760   for (i=0; i<m; i++) {
761     idx    = a->j + a->i[i];
762     v      = a->a + a->i[i];
763     n      = a->i[i+1] - a->i[i];
764     alpha1 = x[5*i];
765     alpha2 = x[5*i+1];
766     alpha3 = x[5*i+2];
767     alpha4 = x[5*i+3];
768     alpha5 = x[5*i+4];
769     while (n-->0) {
770       y[5*(*idx)]   += alpha1*(*v);
771       y[5*(*idx)+1] += alpha2*(*v);
772       y[5*(*idx)+2] += alpha3*(*v);
773       y[5*(*idx)+3] += alpha4*(*v);
774       y[5*(*idx)+4] += alpha5*(*v);
775       idx++; v++;
776     }
777   }
778   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
779   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
780   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
781   PetscFunctionReturn(0);
782 }
783 
784 #undef __FUNCT__
785 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
786 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
787 {
788   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
789   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
790   const PetscScalar *x,*v;
791   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
792   PetscErrorCode    ierr;
793   PetscInt          n,i,jrow,j;
794   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
795 
796   PetscFunctionBegin;
797   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
798   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
799   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
800   idx  = a->j;
801   v    = a->a;
802   ii   = a->i;
803 
804   for (i=0; i<m; i++) {
805     jrow = ii[i];
806     n    = ii[i+1] - jrow;
807     sum1 = 0.0;
808     sum2 = 0.0;
809     sum3 = 0.0;
810     sum4 = 0.0;
811     sum5 = 0.0;
812     for (j=0; j<n; j++) {
813       sum1 += v[jrow]*x[5*idx[jrow]];
814       sum2 += v[jrow]*x[5*idx[jrow]+1];
815       sum3 += v[jrow]*x[5*idx[jrow]+2];
816       sum4 += v[jrow]*x[5*idx[jrow]+3];
817       sum5 += v[jrow]*x[5*idx[jrow]+4];
818       jrow++;
819     }
820     y[5*i]   += sum1;
821     y[5*i+1] += sum2;
822     y[5*i+2] += sum3;
823     y[5*i+3] += sum4;
824     y[5*i+4] += sum5;
825   }
826 
827   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
828   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
829   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
835 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
836 {
837   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
838   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
839   const PetscScalar *x,*v;
840   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
841   PetscErrorCode    ierr;
842   PetscInt          n,i;
843   const PetscInt    m = b->AIJ->rmap->n,*idx;
844 
845   PetscFunctionBegin;
846   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
847   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
848   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
849 
850   for (i=0; i<m; i++) {
851     idx    = a->j + a->i[i];
852     v      = a->a + a->i[i];
853     n      = a->i[i+1] - a->i[i];
854     alpha1 = x[5*i];
855     alpha2 = x[5*i+1];
856     alpha3 = x[5*i+2];
857     alpha4 = x[5*i+3];
858     alpha5 = x[5*i+4];
859     while (n-->0) {
860       y[5*(*idx)]   += alpha1*(*v);
861       y[5*(*idx)+1] += alpha2*(*v);
862       y[5*(*idx)+2] += alpha3*(*v);
863       y[5*(*idx)+3] += alpha4*(*v);
864       y[5*(*idx)+4] += alpha5*(*v);
865       idx++; v++;
866     }
867   }
868   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
869   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
870   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 /* ------------------------------------------------------------------------------*/
875 #undef __FUNCT__
876 #define __FUNCT__ "MatMult_SeqMAIJ_6"
877 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
878 {
879   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
880   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
881   const PetscScalar *x,*v;
882   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
883   PetscErrorCode    ierr;
884   PetscInt          nonzerorow=0,n,i,jrow,j;
885   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
886 
887   PetscFunctionBegin;
888   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
889   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
890   idx  = a->j;
891   v    = a->a;
892   ii   = a->i;
893 
894   for (i=0; i<m; i++) {
895     jrow  = ii[i];
896     n     = ii[i+1] - jrow;
897     sum1  = 0.0;
898     sum2  = 0.0;
899     sum3  = 0.0;
900     sum4  = 0.0;
901     sum5  = 0.0;
902     sum6  = 0.0;
903 
904     nonzerorow += (n>0);
905     for (j=0; j<n; j++) {
906       sum1 += v[jrow]*x[6*idx[jrow]];
907       sum2 += v[jrow]*x[6*idx[jrow]+1];
908       sum3 += v[jrow]*x[6*idx[jrow]+2];
909       sum4 += v[jrow]*x[6*idx[jrow]+3];
910       sum5 += v[jrow]*x[6*idx[jrow]+4];
911       sum6 += v[jrow]*x[6*idx[jrow]+5];
912       jrow++;
913     }
914     y[6*i]   = sum1;
915     y[6*i+1] = sum2;
916     y[6*i+2] = sum3;
917     y[6*i+3] = sum4;
918     y[6*i+4] = sum5;
919     y[6*i+5] = sum6;
920   }
921 
922   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
923   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
924   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
925   PetscFunctionReturn(0);
926 }
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
930 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
931 {
932   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
933   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
934   const PetscScalar *x,*v;
935   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
936   PetscErrorCode    ierr;
937   PetscInt          n,i;
938   const PetscInt    m = b->AIJ->rmap->n,*idx;
939 
940   PetscFunctionBegin;
941   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
942   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
943   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
944 
945   for (i=0; i<m; i++) {
946     idx    = a->j + a->i[i];
947     v      = a->a + a->i[i];
948     n      = a->i[i+1] - a->i[i];
949     alpha1 = x[6*i];
950     alpha2 = x[6*i+1];
951     alpha3 = x[6*i+2];
952     alpha4 = x[6*i+3];
953     alpha5 = x[6*i+4];
954     alpha6 = x[6*i+5];
955     while (n-->0) {
956       y[6*(*idx)]   += alpha1*(*v);
957       y[6*(*idx)+1] += alpha2*(*v);
958       y[6*(*idx)+2] += alpha3*(*v);
959       y[6*(*idx)+3] += alpha4*(*v);
960       y[6*(*idx)+4] += alpha5*(*v);
961       y[6*(*idx)+5] += alpha6*(*v);
962       idx++; v++;
963     }
964   }
965   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
966   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
967   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
973 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
974 {
975   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
976   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
977   const PetscScalar *x,*v;
978   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
979   PetscErrorCode    ierr;
980   PetscInt          n,i,jrow,j;
981   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
982 
983   PetscFunctionBegin;
984   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
985   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
986   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
987   idx  = a->j;
988   v    = a->a;
989   ii   = a->i;
990 
991   for (i=0; i<m; i++) {
992     jrow = ii[i];
993     n    = ii[i+1] - jrow;
994     sum1 = 0.0;
995     sum2 = 0.0;
996     sum3 = 0.0;
997     sum4 = 0.0;
998     sum5 = 0.0;
999     sum6 = 0.0;
1000     for (j=0; j<n; j++) {
1001       sum1 += v[jrow]*x[6*idx[jrow]];
1002       sum2 += v[jrow]*x[6*idx[jrow]+1];
1003       sum3 += v[jrow]*x[6*idx[jrow]+2];
1004       sum4 += v[jrow]*x[6*idx[jrow]+3];
1005       sum5 += v[jrow]*x[6*idx[jrow]+4];
1006       sum6 += v[jrow]*x[6*idx[jrow]+5];
1007       jrow++;
1008     }
1009     y[6*i]   += sum1;
1010     y[6*i+1] += sum2;
1011     y[6*i+2] += sum3;
1012     y[6*i+3] += sum4;
1013     y[6*i+4] += sum5;
1014     y[6*i+5] += sum6;
1015   }
1016 
1017   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1018   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1019   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 #undef __FUNCT__
1024 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
1025 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
1026 {
1027   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1028   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1029   const PetscScalar *x,*v;
1030   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1031   PetscErrorCode    ierr;
1032   PetscInt          n,i;
1033   const PetscInt    m = b->AIJ->rmap->n,*idx;
1034 
1035   PetscFunctionBegin;
1036   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1037   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1038   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1039 
1040   for (i=0; i<m; i++) {
1041     idx    = a->j + a->i[i];
1042     v      = a->a + a->i[i];
1043     n      = a->i[i+1] - a->i[i];
1044     alpha1 = x[6*i];
1045     alpha2 = x[6*i+1];
1046     alpha3 = x[6*i+2];
1047     alpha4 = x[6*i+3];
1048     alpha5 = x[6*i+4];
1049     alpha6 = x[6*i+5];
1050     while (n-->0) {
1051       y[6*(*idx)]   += alpha1*(*v);
1052       y[6*(*idx)+1] += alpha2*(*v);
1053       y[6*(*idx)+2] += alpha3*(*v);
1054       y[6*(*idx)+3] += alpha4*(*v);
1055       y[6*(*idx)+4] += alpha5*(*v);
1056       y[6*(*idx)+5] += alpha6*(*v);
1057       idx++; v++;
1058     }
1059   }
1060   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1061   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1062   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 /* ------------------------------------------------------------------------------*/
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatMult_SeqMAIJ_7"
1069 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1070 {
1071   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1072   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1073   const PetscScalar *x,*v;
1074   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1075   PetscErrorCode    ierr;
1076   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1077   PetscInt          nonzerorow=0,n,i,jrow,j;
1078 
1079   PetscFunctionBegin;
1080   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1081   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1082   idx  = a->j;
1083   v    = a->a;
1084   ii   = a->i;
1085 
1086   for (i=0; i<m; i++) {
1087     jrow  = ii[i];
1088     n     = ii[i+1] - jrow;
1089     sum1  = 0.0;
1090     sum2  = 0.0;
1091     sum3  = 0.0;
1092     sum4  = 0.0;
1093     sum5  = 0.0;
1094     sum6  = 0.0;
1095     sum7  = 0.0;
1096 
1097     nonzerorow += (n>0);
1098     for (j=0; j<n; j++) {
1099       sum1 += v[jrow]*x[7*idx[jrow]];
1100       sum2 += v[jrow]*x[7*idx[jrow]+1];
1101       sum3 += v[jrow]*x[7*idx[jrow]+2];
1102       sum4 += v[jrow]*x[7*idx[jrow]+3];
1103       sum5 += v[jrow]*x[7*idx[jrow]+4];
1104       sum6 += v[jrow]*x[7*idx[jrow]+5];
1105       sum7 += v[jrow]*x[7*idx[jrow]+6];
1106       jrow++;
1107     }
1108     y[7*i]   = sum1;
1109     y[7*i+1] = sum2;
1110     y[7*i+2] = sum3;
1111     y[7*i+3] = sum4;
1112     y[7*i+4] = sum5;
1113     y[7*i+5] = sum6;
1114     y[7*i+6] = sum7;
1115   }
1116 
1117   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
1118   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1119   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1125 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1126 {
1127   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1128   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1129   const PetscScalar *x,*v;
1130   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1131   PetscErrorCode    ierr;
1132   const PetscInt    m = b->AIJ->rmap->n,*idx;
1133   PetscInt          n,i;
1134 
1135   PetscFunctionBegin;
1136   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1137   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1138   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1139 
1140   for (i=0; i<m; i++) {
1141     idx    = a->j + a->i[i];
1142     v      = a->a + a->i[i];
1143     n      = a->i[i+1] - a->i[i];
1144     alpha1 = x[7*i];
1145     alpha2 = x[7*i+1];
1146     alpha3 = x[7*i+2];
1147     alpha4 = x[7*i+3];
1148     alpha5 = x[7*i+4];
1149     alpha6 = x[7*i+5];
1150     alpha7 = x[7*i+6];
1151     while (n-->0) {
1152       y[7*(*idx)]   += alpha1*(*v);
1153       y[7*(*idx)+1] += alpha2*(*v);
1154       y[7*(*idx)+2] += alpha3*(*v);
1155       y[7*(*idx)+3] += alpha4*(*v);
1156       y[7*(*idx)+4] += alpha5*(*v);
1157       y[7*(*idx)+5] += alpha6*(*v);
1158       y[7*(*idx)+6] += alpha7*(*v);
1159       idx++; v++;
1160     }
1161   }
1162   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1163   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1164   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 #undef __FUNCT__
1169 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1170 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1171 {
1172   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1173   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1174   const PetscScalar *x,*v;
1175   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1176   PetscErrorCode    ierr;
1177   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1178   PetscInt          n,i,jrow,j;
1179 
1180   PetscFunctionBegin;
1181   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1182   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1183   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1184   idx  = a->j;
1185   v    = a->a;
1186   ii   = a->i;
1187 
1188   for (i=0; i<m; i++) {
1189     jrow = ii[i];
1190     n    = ii[i+1] - jrow;
1191     sum1 = 0.0;
1192     sum2 = 0.0;
1193     sum3 = 0.0;
1194     sum4 = 0.0;
1195     sum5 = 0.0;
1196     sum6 = 0.0;
1197     sum7 = 0.0;
1198     for (j=0; j<n; j++) {
1199       sum1 += v[jrow]*x[7*idx[jrow]];
1200       sum2 += v[jrow]*x[7*idx[jrow]+1];
1201       sum3 += v[jrow]*x[7*idx[jrow]+2];
1202       sum4 += v[jrow]*x[7*idx[jrow]+3];
1203       sum5 += v[jrow]*x[7*idx[jrow]+4];
1204       sum6 += v[jrow]*x[7*idx[jrow]+5];
1205       sum7 += v[jrow]*x[7*idx[jrow]+6];
1206       jrow++;
1207     }
1208     y[7*i]   += sum1;
1209     y[7*i+1] += sum2;
1210     y[7*i+2] += sum3;
1211     y[7*i+3] += sum4;
1212     y[7*i+4] += sum5;
1213     y[7*i+5] += sum6;
1214     y[7*i+6] += sum7;
1215   }
1216 
1217   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1218   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1219   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 #undef __FUNCT__
1224 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1225 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1226 {
1227   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1228   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1229   const PetscScalar *x,*v;
1230   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1231   PetscErrorCode    ierr;
1232   const PetscInt    m = b->AIJ->rmap->n,*idx;
1233   PetscInt          n,i;
1234 
1235   PetscFunctionBegin;
1236   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1237   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1238   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1239   for (i=0; i<m; i++) {
1240     idx    = a->j + a->i[i];
1241     v      = a->a + a->i[i];
1242     n      = a->i[i+1] - a->i[i];
1243     alpha1 = x[7*i];
1244     alpha2 = x[7*i+1];
1245     alpha3 = x[7*i+2];
1246     alpha4 = x[7*i+3];
1247     alpha5 = x[7*i+4];
1248     alpha6 = x[7*i+5];
1249     alpha7 = x[7*i+6];
1250     while (n-->0) {
1251       y[7*(*idx)]   += alpha1*(*v);
1252       y[7*(*idx)+1] += alpha2*(*v);
1253       y[7*(*idx)+2] += alpha3*(*v);
1254       y[7*(*idx)+3] += alpha4*(*v);
1255       y[7*(*idx)+4] += alpha5*(*v);
1256       y[7*(*idx)+5] += alpha6*(*v);
1257       y[7*(*idx)+6] += alpha7*(*v);
1258       idx++; v++;
1259     }
1260   }
1261   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1262   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1263   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #undef __FUNCT__
1268 #define __FUNCT__ "MatMult_SeqMAIJ_8"
1269 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1270 {
1271   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1272   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1273   const PetscScalar *x,*v;
1274   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1275   PetscErrorCode    ierr;
1276   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1277   PetscInt          nonzerorow=0,n,i,jrow,j;
1278 
1279   PetscFunctionBegin;
1280   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1281   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1282   idx  = a->j;
1283   v    = a->a;
1284   ii   = a->i;
1285 
1286   for (i=0; i<m; i++) {
1287     jrow  = ii[i];
1288     n     = ii[i+1] - jrow;
1289     sum1  = 0.0;
1290     sum2  = 0.0;
1291     sum3  = 0.0;
1292     sum4  = 0.0;
1293     sum5  = 0.0;
1294     sum6  = 0.0;
1295     sum7  = 0.0;
1296     sum8  = 0.0;
1297 
1298     nonzerorow += (n>0);
1299     for (j=0; j<n; j++) {
1300       sum1 += v[jrow]*x[8*idx[jrow]];
1301       sum2 += v[jrow]*x[8*idx[jrow]+1];
1302       sum3 += v[jrow]*x[8*idx[jrow]+2];
1303       sum4 += v[jrow]*x[8*idx[jrow]+3];
1304       sum5 += v[jrow]*x[8*idx[jrow]+4];
1305       sum6 += v[jrow]*x[8*idx[jrow]+5];
1306       sum7 += v[jrow]*x[8*idx[jrow]+6];
1307       sum8 += v[jrow]*x[8*idx[jrow]+7];
1308       jrow++;
1309     }
1310     y[8*i]   = sum1;
1311     y[8*i+1] = sum2;
1312     y[8*i+2] = sum3;
1313     y[8*i+3] = sum4;
1314     y[8*i+4] = sum5;
1315     y[8*i+5] = sum6;
1316     y[8*i+6] = sum7;
1317     y[8*i+7] = sum8;
1318   }
1319 
1320   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
1321   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1322   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1328 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1329 {
1330   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1331   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1332   const PetscScalar *x,*v;
1333   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1334   PetscErrorCode    ierr;
1335   const PetscInt    m = b->AIJ->rmap->n,*idx;
1336   PetscInt          n,i;
1337 
1338   PetscFunctionBegin;
1339   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1340   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1341   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1342 
1343   for (i=0; i<m; i++) {
1344     idx    = a->j + a->i[i];
1345     v      = a->a + a->i[i];
1346     n      = a->i[i+1] - a->i[i];
1347     alpha1 = x[8*i];
1348     alpha2 = x[8*i+1];
1349     alpha3 = x[8*i+2];
1350     alpha4 = x[8*i+3];
1351     alpha5 = x[8*i+4];
1352     alpha6 = x[8*i+5];
1353     alpha7 = x[8*i+6];
1354     alpha8 = x[8*i+7];
1355     while (n-->0) {
1356       y[8*(*idx)]   += alpha1*(*v);
1357       y[8*(*idx)+1] += alpha2*(*v);
1358       y[8*(*idx)+2] += alpha3*(*v);
1359       y[8*(*idx)+3] += alpha4*(*v);
1360       y[8*(*idx)+4] += alpha5*(*v);
1361       y[8*(*idx)+5] += alpha6*(*v);
1362       y[8*(*idx)+6] += alpha7*(*v);
1363       y[8*(*idx)+7] += alpha8*(*v);
1364       idx++; v++;
1365     }
1366   }
1367   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1368   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1369   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1375 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1376 {
1377   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1378   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1379   const PetscScalar *x,*v;
1380   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1381   PetscErrorCode    ierr;
1382   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1383   PetscInt          n,i,jrow,j;
1384 
1385   PetscFunctionBegin;
1386   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1387   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1388   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1389   idx  = a->j;
1390   v    = a->a;
1391   ii   = a->i;
1392 
1393   for (i=0; i<m; i++) {
1394     jrow = ii[i];
1395     n    = ii[i+1] - jrow;
1396     sum1 = 0.0;
1397     sum2 = 0.0;
1398     sum3 = 0.0;
1399     sum4 = 0.0;
1400     sum5 = 0.0;
1401     sum6 = 0.0;
1402     sum7 = 0.0;
1403     sum8 = 0.0;
1404     for (j=0; j<n; j++) {
1405       sum1 += v[jrow]*x[8*idx[jrow]];
1406       sum2 += v[jrow]*x[8*idx[jrow]+1];
1407       sum3 += v[jrow]*x[8*idx[jrow]+2];
1408       sum4 += v[jrow]*x[8*idx[jrow]+3];
1409       sum5 += v[jrow]*x[8*idx[jrow]+4];
1410       sum6 += v[jrow]*x[8*idx[jrow]+5];
1411       sum7 += v[jrow]*x[8*idx[jrow]+6];
1412       sum8 += v[jrow]*x[8*idx[jrow]+7];
1413       jrow++;
1414     }
1415     y[8*i]   += sum1;
1416     y[8*i+1] += sum2;
1417     y[8*i+2] += sum3;
1418     y[8*i+3] += sum4;
1419     y[8*i+4] += sum5;
1420     y[8*i+5] += sum6;
1421     y[8*i+6] += sum7;
1422     y[8*i+7] += sum8;
1423   }
1424 
1425   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1426   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1427   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1433 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1434 {
1435   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1436   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1437   const PetscScalar *x,*v;
1438   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1439   PetscErrorCode    ierr;
1440   const PetscInt    m = b->AIJ->rmap->n,*idx;
1441   PetscInt          n,i;
1442 
1443   PetscFunctionBegin;
1444   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1445   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1446   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1447   for (i=0; i<m; i++) {
1448     idx    = a->j + a->i[i];
1449     v      = a->a + a->i[i];
1450     n      = a->i[i+1] - a->i[i];
1451     alpha1 = x[8*i];
1452     alpha2 = x[8*i+1];
1453     alpha3 = x[8*i+2];
1454     alpha4 = x[8*i+3];
1455     alpha5 = x[8*i+4];
1456     alpha6 = x[8*i+5];
1457     alpha7 = x[8*i+6];
1458     alpha8 = x[8*i+7];
1459     while (n-->0) {
1460       y[8*(*idx)]   += alpha1*(*v);
1461       y[8*(*idx)+1] += alpha2*(*v);
1462       y[8*(*idx)+2] += alpha3*(*v);
1463       y[8*(*idx)+3] += alpha4*(*v);
1464       y[8*(*idx)+4] += alpha5*(*v);
1465       y[8*(*idx)+5] += alpha6*(*v);
1466       y[8*(*idx)+6] += alpha7*(*v);
1467       y[8*(*idx)+7] += alpha8*(*v);
1468       idx++; v++;
1469     }
1470   }
1471   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1472   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1473   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 /* ------------------------------------------------------------------------------*/
1478 #undef __FUNCT__
1479 #define __FUNCT__ "MatMult_SeqMAIJ_9"
1480 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1481 {
1482   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1483   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1484   const PetscScalar *x,*v;
1485   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1486   PetscErrorCode    ierr;
1487   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1488   PetscInt          nonzerorow=0,n,i,jrow,j;
1489 
1490   PetscFunctionBegin;
1491   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1492   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1493   idx  = a->j;
1494   v    = a->a;
1495   ii   = a->i;
1496 
1497   for (i=0; i<m; i++) {
1498     jrow  = ii[i];
1499     n     = ii[i+1] - jrow;
1500     sum1  = 0.0;
1501     sum2  = 0.0;
1502     sum3  = 0.0;
1503     sum4  = 0.0;
1504     sum5  = 0.0;
1505     sum6  = 0.0;
1506     sum7  = 0.0;
1507     sum8  = 0.0;
1508     sum9  = 0.0;
1509 
1510     nonzerorow += (n>0);
1511     for (j=0; j<n; j++) {
1512       sum1 += v[jrow]*x[9*idx[jrow]];
1513       sum2 += v[jrow]*x[9*idx[jrow]+1];
1514       sum3 += v[jrow]*x[9*idx[jrow]+2];
1515       sum4 += v[jrow]*x[9*idx[jrow]+3];
1516       sum5 += v[jrow]*x[9*idx[jrow]+4];
1517       sum6 += v[jrow]*x[9*idx[jrow]+5];
1518       sum7 += v[jrow]*x[9*idx[jrow]+6];
1519       sum8 += v[jrow]*x[9*idx[jrow]+7];
1520       sum9 += v[jrow]*x[9*idx[jrow]+8];
1521       jrow++;
1522     }
1523     y[9*i]   = sum1;
1524     y[9*i+1] = sum2;
1525     y[9*i+2] = sum3;
1526     y[9*i+3] = sum4;
1527     y[9*i+4] = sum5;
1528     y[9*i+5] = sum6;
1529     y[9*i+6] = sum7;
1530     y[9*i+7] = sum8;
1531     y[9*i+8] = sum9;
1532   }
1533 
1534   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
1535   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1536   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 /* ------------------------------------------------------------------------------*/
1541 
1542 #undef __FUNCT__
1543 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1544 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1545 {
1546   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1547   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1548   const PetscScalar *x,*v;
1549   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1550   PetscErrorCode    ierr;
1551   const PetscInt    m = b->AIJ->rmap->n,*idx;
1552   PetscInt          n,i;
1553 
1554   PetscFunctionBegin;
1555   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1556   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1557   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1558 
1559   for (i=0; i<m; i++) {
1560     idx    = a->j + a->i[i];
1561     v      = a->a + a->i[i];
1562     n      = a->i[i+1] - a->i[i];
1563     alpha1 = x[9*i];
1564     alpha2 = x[9*i+1];
1565     alpha3 = x[9*i+2];
1566     alpha4 = x[9*i+3];
1567     alpha5 = x[9*i+4];
1568     alpha6 = x[9*i+5];
1569     alpha7 = x[9*i+6];
1570     alpha8 = x[9*i+7];
1571     alpha9 = x[9*i+8];
1572     while (n-->0) {
1573       y[9*(*idx)]   += alpha1*(*v);
1574       y[9*(*idx)+1] += alpha2*(*v);
1575       y[9*(*idx)+2] += alpha3*(*v);
1576       y[9*(*idx)+3] += alpha4*(*v);
1577       y[9*(*idx)+4] += alpha5*(*v);
1578       y[9*(*idx)+5] += alpha6*(*v);
1579       y[9*(*idx)+6] += alpha7*(*v);
1580       y[9*(*idx)+7] += alpha8*(*v);
1581       y[9*(*idx)+8] += alpha9*(*v);
1582       idx++; v++;
1583     }
1584   }
1585   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1586   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1587   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1593 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1594 {
1595   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1596   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1597   const PetscScalar *x,*v;
1598   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1599   PetscErrorCode    ierr;
1600   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1601   PetscInt          n,i,jrow,j;
1602 
1603   PetscFunctionBegin;
1604   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1605   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1606   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1607   idx  = a->j;
1608   v    = a->a;
1609   ii   = a->i;
1610 
1611   for (i=0; i<m; i++) {
1612     jrow = ii[i];
1613     n    = ii[i+1] - jrow;
1614     sum1 = 0.0;
1615     sum2 = 0.0;
1616     sum3 = 0.0;
1617     sum4 = 0.0;
1618     sum5 = 0.0;
1619     sum6 = 0.0;
1620     sum7 = 0.0;
1621     sum8 = 0.0;
1622     sum9 = 0.0;
1623     for (j=0; j<n; j++) {
1624       sum1 += v[jrow]*x[9*idx[jrow]];
1625       sum2 += v[jrow]*x[9*idx[jrow]+1];
1626       sum3 += v[jrow]*x[9*idx[jrow]+2];
1627       sum4 += v[jrow]*x[9*idx[jrow]+3];
1628       sum5 += v[jrow]*x[9*idx[jrow]+4];
1629       sum6 += v[jrow]*x[9*idx[jrow]+5];
1630       sum7 += v[jrow]*x[9*idx[jrow]+6];
1631       sum8 += v[jrow]*x[9*idx[jrow]+7];
1632       sum9 += v[jrow]*x[9*idx[jrow]+8];
1633       jrow++;
1634     }
1635     y[9*i]   += sum1;
1636     y[9*i+1] += sum2;
1637     y[9*i+2] += sum3;
1638     y[9*i+3] += sum4;
1639     y[9*i+4] += sum5;
1640     y[9*i+5] += sum6;
1641     y[9*i+6] += sum7;
1642     y[9*i+7] += sum8;
1643     y[9*i+8] += sum9;
1644   }
1645 
1646   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1647   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1648   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 #undef __FUNCT__
1653 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1654 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1655 {
1656   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1657   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1658   const PetscScalar *x,*v;
1659   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1660   PetscErrorCode    ierr;
1661   const PetscInt    m = b->AIJ->rmap->n,*idx;
1662   PetscInt          n,i;
1663 
1664   PetscFunctionBegin;
1665   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1666   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1667   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1668   for (i=0; i<m; i++) {
1669     idx    = a->j + a->i[i];
1670     v      = a->a + a->i[i];
1671     n      = a->i[i+1] - a->i[i];
1672     alpha1 = x[9*i];
1673     alpha2 = x[9*i+1];
1674     alpha3 = x[9*i+2];
1675     alpha4 = x[9*i+3];
1676     alpha5 = x[9*i+4];
1677     alpha6 = x[9*i+5];
1678     alpha7 = x[9*i+6];
1679     alpha8 = x[9*i+7];
1680     alpha9 = x[9*i+8];
1681     while (n-->0) {
1682       y[9*(*idx)]   += alpha1*(*v);
1683       y[9*(*idx)+1] += alpha2*(*v);
1684       y[9*(*idx)+2] += alpha3*(*v);
1685       y[9*(*idx)+3] += alpha4*(*v);
1686       y[9*(*idx)+4] += alpha5*(*v);
1687       y[9*(*idx)+5] += alpha6*(*v);
1688       y[9*(*idx)+6] += alpha7*(*v);
1689       y[9*(*idx)+7] += alpha8*(*v);
1690       y[9*(*idx)+8] += alpha9*(*v);
1691       idx++; v++;
1692     }
1693   }
1694   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1695   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1696   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1697   PetscFunctionReturn(0);
1698 }
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1701 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1702 {
1703   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1704   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1705   const PetscScalar *x,*v;
1706   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1707   PetscErrorCode    ierr;
1708   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1709   PetscInt          nonzerorow=0,n,i,jrow,j;
1710 
1711   PetscFunctionBegin;
1712   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1713   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1714   idx  = a->j;
1715   v    = a->a;
1716   ii   = a->i;
1717 
1718   for (i=0; i<m; i++) {
1719     jrow  = ii[i];
1720     n     = ii[i+1] - jrow;
1721     sum1  = 0.0;
1722     sum2  = 0.0;
1723     sum3  = 0.0;
1724     sum4  = 0.0;
1725     sum5  = 0.0;
1726     sum6  = 0.0;
1727     sum7  = 0.0;
1728     sum8  = 0.0;
1729     sum9  = 0.0;
1730     sum10 = 0.0;
1731 
1732     nonzerorow += (n>0);
1733     for (j=0; j<n; j++) {
1734       sum1  += v[jrow]*x[10*idx[jrow]];
1735       sum2  += v[jrow]*x[10*idx[jrow]+1];
1736       sum3  += v[jrow]*x[10*idx[jrow]+2];
1737       sum4  += v[jrow]*x[10*idx[jrow]+3];
1738       sum5  += v[jrow]*x[10*idx[jrow]+4];
1739       sum6  += v[jrow]*x[10*idx[jrow]+5];
1740       sum7  += v[jrow]*x[10*idx[jrow]+6];
1741       sum8  += v[jrow]*x[10*idx[jrow]+7];
1742       sum9  += v[jrow]*x[10*idx[jrow]+8];
1743       sum10 += v[jrow]*x[10*idx[jrow]+9];
1744       jrow++;
1745     }
1746     y[10*i]   = sum1;
1747     y[10*i+1] = sum2;
1748     y[10*i+2] = sum3;
1749     y[10*i+3] = sum4;
1750     y[10*i+4] = sum5;
1751     y[10*i+5] = sum6;
1752     y[10*i+6] = sum7;
1753     y[10*i+7] = sum8;
1754     y[10*i+8] = sum9;
1755     y[10*i+9] = sum10;
1756   }
1757 
1758   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
1759   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1760   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 #undef __FUNCT__
1765 #define __FUNCT__ "MatMultAdd_SeqMAIJ_10"
1766 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1767 {
1768   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1769   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1770   const PetscScalar *x,*v;
1771   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1772   PetscErrorCode    ierr;
1773   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1774   PetscInt          n,i,jrow,j;
1775 
1776   PetscFunctionBegin;
1777   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1778   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1779   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1780   idx  = a->j;
1781   v    = a->a;
1782   ii   = a->i;
1783 
1784   for (i=0; i<m; i++) {
1785     jrow  = ii[i];
1786     n     = ii[i+1] - jrow;
1787     sum1  = 0.0;
1788     sum2  = 0.0;
1789     sum3  = 0.0;
1790     sum4  = 0.0;
1791     sum5  = 0.0;
1792     sum6  = 0.0;
1793     sum7  = 0.0;
1794     sum8  = 0.0;
1795     sum9  = 0.0;
1796     sum10 = 0.0;
1797     for (j=0; j<n; j++) {
1798       sum1  += v[jrow]*x[10*idx[jrow]];
1799       sum2  += v[jrow]*x[10*idx[jrow]+1];
1800       sum3  += v[jrow]*x[10*idx[jrow]+2];
1801       sum4  += v[jrow]*x[10*idx[jrow]+3];
1802       sum5  += v[jrow]*x[10*idx[jrow]+4];
1803       sum6  += v[jrow]*x[10*idx[jrow]+5];
1804       sum7  += v[jrow]*x[10*idx[jrow]+6];
1805       sum8  += v[jrow]*x[10*idx[jrow]+7];
1806       sum9  += v[jrow]*x[10*idx[jrow]+8];
1807       sum10 += v[jrow]*x[10*idx[jrow]+9];
1808       jrow++;
1809     }
1810     y[10*i]   += sum1;
1811     y[10*i+1] += sum2;
1812     y[10*i+2] += sum3;
1813     y[10*i+3] += sum4;
1814     y[10*i+4] += sum5;
1815     y[10*i+5] += sum6;
1816     y[10*i+6] += sum7;
1817     y[10*i+7] += sum8;
1818     y[10*i+8] += sum9;
1819     y[10*i+9] += sum10;
1820   }
1821 
1822   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1823   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1824   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 #undef __FUNCT__
1829 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1830 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1831 {
1832   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1833   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1834   const PetscScalar *x,*v;
1835   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1836   PetscErrorCode    ierr;
1837   const PetscInt    m = b->AIJ->rmap->n,*idx;
1838   PetscInt          n,i;
1839 
1840   PetscFunctionBegin;
1841   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1842   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1843   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1844 
1845   for (i=0; i<m; i++) {
1846     idx     = a->j + a->i[i];
1847     v       = a->a + a->i[i];
1848     n       = a->i[i+1] - a->i[i];
1849     alpha1  = x[10*i];
1850     alpha2  = x[10*i+1];
1851     alpha3  = x[10*i+2];
1852     alpha4  = x[10*i+3];
1853     alpha5  = x[10*i+4];
1854     alpha6  = x[10*i+5];
1855     alpha7  = x[10*i+6];
1856     alpha8  = x[10*i+7];
1857     alpha9  = x[10*i+8];
1858     alpha10 = x[10*i+9];
1859     while (n-->0) {
1860       y[10*(*idx)]   += alpha1*(*v);
1861       y[10*(*idx)+1] += alpha2*(*v);
1862       y[10*(*idx)+2] += alpha3*(*v);
1863       y[10*(*idx)+3] += alpha4*(*v);
1864       y[10*(*idx)+4] += alpha5*(*v);
1865       y[10*(*idx)+5] += alpha6*(*v);
1866       y[10*(*idx)+6] += alpha7*(*v);
1867       y[10*(*idx)+7] += alpha8*(*v);
1868       y[10*(*idx)+8] += alpha9*(*v);
1869       y[10*(*idx)+9] += alpha10*(*v);
1870       idx++; v++;
1871     }
1872   }
1873   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1874   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1875   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 #undef __FUNCT__
1880 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1881 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1882 {
1883   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1884   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1885   const PetscScalar *x,*v;
1886   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1887   PetscErrorCode    ierr;
1888   const PetscInt    m = b->AIJ->rmap->n,*idx;
1889   PetscInt          n,i;
1890 
1891   PetscFunctionBegin;
1892   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1893   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1894   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1895   for (i=0; i<m; i++) {
1896     idx     = a->j + a->i[i];
1897     v       = a->a + a->i[i];
1898     n       = a->i[i+1] - a->i[i];
1899     alpha1  = x[10*i];
1900     alpha2  = x[10*i+1];
1901     alpha3  = x[10*i+2];
1902     alpha4  = x[10*i+3];
1903     alpha5  = x[10*i+4];
1904     alpha6  = x[10*i+5];
1905     alpha7  = x[10*i+6];
1906     alpha8  = x[10*i+7];
1907     alpha9  = x[10*i+8];
1908     alpha10 = x[10*i+9];
1909     while (n-->0) {
1910       y[10*(*idx)]   += alpha1*(*v);
1911       y[10*(*idx)+1] += alpha2*(*v);
1912       y[10*(*idx)+2] += alpha3*(*v);
1913       y[10*(*idx)+3] += alpha4*(*v);
1914       y[10*(*idx)+4] += alpha5*(*v);
1915       y[10*(*idx)+5] += alpha6*(*v);
1916       y[10*(*idx)+6] += alpha7*(*v);
1917       y[10*(*idx)+7] += alpha8*(*v);
1918       y[10*(*idx)+8] += alpha9*(*v);
1919       y[10*(*idx)+9] += alpha10*(*v);
1920       idx++; v++;
1921     }
1922   }
1923   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1924   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1925   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 
1930 /*--------------------------------------------------------------------------------------------*/
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatMult_SeqMAIJ_11"
1933 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1934 {
1935   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1936   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1937   const PetscScalar *x,*v;
1938   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1939   PetscErrorCode    ierr;
1940   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1941   PetscInt          nonzerorow=0,n,i,jrow,j;
1942 
1943   PetscFunctionBegin;
1944   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1945   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1946   idx  = a->j;
1947   v    = a->a;
1948   ii   = a->i;
1949 
1950   for (i=0; i<m; i++) {
1951     jrow  = ii[i];
1952     n     = ii[i+1] - jrow;
1953     sum1  = 0.0;
1954     sum2  = 0.0;
1955     sum3  = 0.0;
1956     sum4  = 0.0;
1957     sum5  = 0.0;
1958     sum6  = 0.0;
1959     sum7  = 0.0;
1960     sum8  = 0.0;
1961     sum9  = 0.0;
1962     sum10 = 0.0;
1963     sum11 = 0.0;
1964 
1965     nonzerorow += (n>0);
1966     for (j=0; j<n; j++) {
1967       sum1  += v[jrow]*x[11*idx[jrow]];
1968       sum2  += v[jrow]*x[11*idx[jrow]+1];
1969       sum3  += v[jrow]*x[11*idx[jrow]+2];
1970       sum4  += v[jrow]*x[11*idx[jrow]+3];
1971       sum5  += v[jrow]*x[11*idx[jrow]+4];
1972       sum6  += v[jrow]*x[11*idx[jrow]+5];
1973       sum7  += v[jrow]*x[11*idx[jrow]+6];
1974       sum8  += v[jrow]*x[11*idx[jrow]+7];
1975       sum9  += v[jrow]*x[11*idx[jrow]+8];
1976       sum10 += v[jrow]*x[11*idx[jrow]+9];
1977       sum11 += v[jrow]*x[11*idx[jrow]+10];
1978       jrow++;
1979     }
1980     y[11*i]    = sum1;
1981     y[11*i+1]  = sum2;
1982     y[11*i+2]  = sum3;
1983     y[11*i+3]  = sum4;
1984     y[11*i+4]  = sum5;
1985     y[11*i+5]  = sum6;
1986     y[11*i+6]  = sum7;
1987     y[11*i+7]  = sum8;
1988     y[11*i+8]  = sum9;
1989     y[11*i+9]  = sum10;
1990     y[11*i+10] = sum11;
1991   }
1992 
1993   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
1994   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1995   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 #undef __FUNCT__
2000 #define __FUNCT__ "MatMultAdd_SeqMAIJ_11"
2001 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2002 {
2003   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2004   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2005   const PetscScalar *x,*v;
2006   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
2007   PetscErrorCode    ierr;
2008   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2009   PetscInt          n,i,jrow,j;
2010 
2011   PetscFunctionBegin;
2012   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2013   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2014   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2015   idx  = a->j;
2016   v    = a->a;
2017   ii   = a->i;
2018 
2019   for (i=0; i<m; i++) {
2020     jrow  = ii[i];
2021     n     = ii[i+1] - jrow;
2022     sum1  = 0.0;
2023     sum2  = 0.0;
2024     sum3  = 0.0;
2025     sum4  = 0.0;
2026     sum5  = 0.0;
2027     sum6  = 0.0;
2028     sum7  = 0.0;
2029     sum8  = 0.0;
2030     sum9  = 0.0;
2031     sum10 = 0.0;
2032     sum11 = 0.0;
2033     for (j=0; j<n; j++) {
2034       sum1  += v[jrow]*x[11*idx[jrow]];
2035       sum2  += v[jrow]*x[11*idx[jrow]+1];
2036       sum3  += v[jrow]*x[11*idx[jrow]+2];
2037       sum4  += v[jrow]*x[11*idx[jrow]+3];
2038       sum5  += v[jrow]*x[11*idx[jrow]+4];
2039       sum6  += v[jrow]*x[11*idx[jrow]+5];
2040       sum7  += v[jrow]*x[11*idx[jrow]+6];
2041       sum8  += v[jrow]*x[11*idx[jrow]+7];
2042       sum9  += v[jrow]*x[11*idx[jrow]+8];
2043       sum10 += v[jrow]*x[11*idx[jrow]+9];
2044       sum11 += v[jrow]*x[11*idx[jrow]+10];
2045       jrow++;
2046     }
2047     y[11*i]    += sum1;
2048     y[11*i+1]  += sum2;
2049     y[11*i+2]  += sum3;
2050     y[11*i+3]  += sum4;
2051     y[11*i+4]  += sum5;
2052     y[11*i+5]  += sum6;
2053     y[11*i+6]  += sum7;
2054     y[11*i+7]  += sum8;
2055     y[11*i+8]  += sum9;
2056     y[11*i+9]  += sum10;
2057     y[11*i+10] += sum11;
2058   }
2059 
2060   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2061   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2062   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2063   PetscFunctionReturn(0);
2064 }
2065 
2066 #undef __FUNCT__
2067 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11"
2068 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2069 {
2070   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2071   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2072   const PetscScalar *x,*v;
2073   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2074   PetscErrorCode    ierr;
2075   const PetscInt    m = b->AIJ->rmap->n,*idx;
2076   PetscInt          n,i;
2077 
2078   PetscFunctionBegin;
2079   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2080   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2081   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2082 
2083   for (i=0; i<m; i++) {
2084     idx     = a->j + a->i[i];
2085     v       = a->a + a->i[i];
2086     n       = a->i[i+1] - a->i[i];
2087     alpha1  = x[11*i];
2088     alpha2  = x[11*i+1];
2089     alpha3  = x[11*i+2];
2090     alpha4  = x[11*i+3];
2091     alpha5  = x[11*i+4];
2092     alpha6  = x[11*i+5];
2093     alpha7  = x[11*i+6];
2094     alpha8  = x[11*i+7];
2095     alpha9  = x[11*i+8];
2096     alpha10 = x[11*i+9];
2097     alpha11 = x[11*i+10];
2098     while (n-->0) {
2099       y[11*(*idx)]    += alpha1*(*v);
2100       y[11*(*idx)+1]  += alpha2*(*v);
2101       y[11*(*idx)+2]  += alpha3*(*v);
2102       y[11*(*idx)+3]  += alpha4*(*v);
2103       y[11*(*idx)+4]  += alpha5*(*v);
2104       y[11*(*idx)+5]  += alpha6*(*v);
2105       y[11*(*idx)+6]  += alpha7*(*v);
2106       y[11*(*idx)+7]  += alpha8*(*v);
2107       y[11*(*idx)+8]  += alpha9*(*v);
2108       y[11*(*idx)+9]  += alpha10*(*v);
2109       y[11*(*idx)+10] += alpha11*(*v);
2110       idx++; v++;
2111     }
2112   }
2113   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2114   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2115   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 #undef __FUNCT__
2120 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11"
2121 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2122 {
2123   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2124   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2125   const PetscScalar *x,*v;
2126   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2127   PetscErrorCode    ierr;
2128   const PetscInt    m = b->AIJ->rmap->n,*idx;
2129   PetscInt          n,i;
2130 
2131   PetscFunctionBegin;
2132   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2133   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2134   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2135   for (i=0; i<m; i++) {
2136     idx     = a->j + a->i[i];
2137     v       = a->a + a->i[i];
2138     n       = a->i[i+1] - a->i[i];
2139     alpha1  = x[11*i];
2140     alpha2  = x[11*i+1];
2141     alpha3  = x[11*i+2];
2142     alpha4  = x[11*i+3];
2143     alpha5  = x[11*i+4];
2144     alpha6  = x[11*i+5];
2145     alpha7  = x[11*i+6];
2146     alpha8  = x[11*i+7];
2147     alpha9  = x[11*i+8];
2148     alpha10 = x[11*i+9];
2149     alpha11 = x[11*i+10];
2150     while (n-->0) {
2151       y[11*(*idx)]    += alpha1*(*v);
2152       y[11*(*idx)+1]  += alpha2*(*v);
2153       y[11*(*idx)+2]  += alpha3*(*v);
2154       y[11*(*idx)+3]  += alpha4*(*v);
2155       y[11*(*idx)+4]  += alpha5*(*v);
2156       y[11*(*idx)+5]  += alpha6*(*v);
2157       y[11*(*idx)+6]  += alpha7*(*v);
2158       y[11*(*idx)+7]  += alpha8*(*v);
2159       y[11*(*idx)+8]  += alpha9*(*v);
2160       y[11*(*idx)+9]  += alpha10*(*v);
2161       y[11*(*idx)+10] += alpha11*(*v);
2162       idx++; v++;
2163     }
2164   }
2165   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2166   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2167   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 
2172 /*--------------------------------------------------------------------------------------------*/
2173 #undef __FUNCT__
2174 #define __FUNCT__ "MatMult_SeqMAIJ_16"
2175 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2176 {
2177   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2178   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2179   const PetscScalar *x,*v;
2180   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2181   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2182   PetscErrorCode    ierr;
2183   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2184   PetscInt          nonzerorow=0,n,i,jrow,j;
2185 
2186   PetscFunctionBegin;
2187   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2188   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2189   idx  = a->j;
2190   v    = a->a;
2191   ii   = a->i;
2192 
2193   for (i=0; i<m; i++) {
2194     jrow  = ii[i];
2195     n     = ii[i+1] - jrow;
2196     sum1  = 0.0;
2197     sum2  = 0.0;
2198     sum3  = 0.0;
2199     sum4  = 0.0;
2200     sum5  = 0.0;
2201     sum6  = 0.0;
2202     sum7  = 0.0;
2203     sum8  = 0.0;
2204     sum9  = 0.0;
2205     sum10 = 0.0;
2206     sum11 = 0.0;
2207     sum12 = 0.0;
2208     sum13 = 0.0;
2209     sum14 = 0.0;
2210     sum15 = 0.0;
2211     sum16 = 0.0;
2212 
2213     nonzerorow += (n>0);
2214     for (j=0; j<n; j++) {
2215       sum1  += v[jrow]*x[16*idx[jrow]];
2216       sum2  += v[jrow]*x[16*idx[jrow]+1];
2217       sum3  += v[jrow]*x[16*idx[jrow]+2];
2218       sum4  += v[jrow]*x[16*idx[jrow]+3];
2219       sum5  += v[jrow]*x[16*idx[jrow]+4];
2220       sum6  += v[jrow]*x[16*idx[jrow]+5];
2221       sum7  += v[jrow]*x[16*idx[jrow]+6];
2222       sum8  += v[jrow]*x[16*idx[jrow]+7];
2223       sum9  += v[jrow]*x[16*idx[jrow]+8];
2224       sum10 += v[jrow]*x[16*idx[jrow]+9];
2225       sum11 += v[jrow]*x[16*idx[jrow]+10];
2226       sum12 += v[jrow]*x[16*idx[jrow]+11];
2227       sum13 += v[jrow]*x[16*idx[jrow]+12];
2228       sum14 += v[jrow]*x[16*idx[jrow]+13];
2229       sum15 += v[jrow]*x[16*idx[jrow]+14];
2230       sum16 += v[jrow]*x[16*idx[jrow]+15];
2231       jrow++;
2232     }
2233     y[16*i]    = sum1;
2234     y[16*i+1]  = sum2;
2235     y[16*i+2]  = sum3;
2236     y[16*i+3]  = sum4;
2237     y[16*i+4]  = sum5;
2238     y[16*i+5]  = sum6;
2239     y[16*i+6]  = sum7;
2240     y[16*i+7]  = sum8;
2241     y[16*i+8]  = sum9;
2242     y[16*i+9]  = sum10;
2243     y[16*i+10] = sum11;
2244     y[16*i+11] = sum12;
2245     y[16*i+12] = sum13;
2246     y[16*i+13] = sum14;
2247     y[16*i+14] = sum15;
2248     y[16*i+15] = sum16;
2249   }
2250 
2251   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
2252   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2253   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
2259 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2260 {
2261   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2262   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2263   const PetscScalar *x,*v;
2264   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2265   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2266   PetscErrorCode    ierr;
2267   const PetscInt    m = b->AIJ->rmap->n,*idx;
2268   PetscInt          n,i;
2269 
2270   PetscFunctionBegin;
2271   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2272   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2273   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2274 
2275   for (i=0; i<m; i++) {
2276     idx     = a->j + a->i[i];
2277     v       = a->a + a->i[i];
2278     n       = a->i[i+1] - a->i[i];
2279     alpha1  = x[16*i];
2280     alpha2  = x[16*i+1];
2281     alpha3  = x[16*i+2];
2282     alpha4  = x[16*i+3];
2283     alpha5  = x[16*i+4];
2284     alpha6  = x[16*i+5];
2285     alpha7  = x[16*i+6];
2286     alpha8  = x[16*i+7];
2287     alpha9  = x[16*i+8];
2288     alpha10 = x[16*i+9];
2289     alpha11 = x[16*i+10];
2290     alpha12 = x[16*i+11];
2291     alpha13 = x[16*i+12];
2292     alpha14 = x[16*i+13];
2293     alpha15 = x[16*i+14];
2294     alpha16 = x[16*i+15];
2295     while (n-->0) {
2296       y[16*(*idx)]    += alpha1*(*v);
2297       y[16*(*idx)+1]  += alpha2*(*v);
2298       y[16*(*idx)+2]  += alpha3*(*v);
2299       y[16*(*idx)+3]  += alpha4*(*v);
2300       y[16*(*idx)+4]  += alpha5*(*v);
2301       y[16*(*idx)+5]  += alpha6*(*v);
2302       y[16*(*idx)+6]  += alpha7*(*v);
2303       y[16*(*idx)+7]  += alpha8*(*v);
2304       y[16*(*idx)+8]  += alpha9*(*v);
2305       y[16*(*idx)+9]  += alpha10*(*v);
2306       y[16*(*idx)+10] += alpha11*(*v);
2307       y[16*(*idx)+11] += alpha12*(*v);
2308       y[16*(*idx)+12] += alpha13*(*v);
2309       y[16*(*idx)+13] += alpha14*(*v);
2310       y[16*(*idx)+14] += alpha15*(*v);
2311       y[16*(*idx)+15] += alpha16*(*v);
2312       idx++; v++;
2313     }
2314   }
2315   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2316   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2317   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 #undef __FUNCT__
2322 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
2323 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2324 {
2325   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2326   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2327   const PetscScalar *x,*v;
2328   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2329   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2330   PetscErrorCode    ierr;
2331   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2332   PetscInt          n,i,jrow,j;
2333 
2334   PetscFunctionBegin;
2335   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2336   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2337   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2338   idx  = a->j;
2339   v    = a->a;
2340   ii   = a->i;
2341 
2342   for (i=0; i<m; i++) {
2343     jrow  = ii[i];
2344     n     = ii[i+1] - jrow;
2345     sum1  = 0.0;
2346     sum2  = 0.0;
2347     sum3  = 0.0;
2348     sum4  = 0.0;
2349     sum5  = 0.0;
2350     sum6  = 0.0;
2351     sum7  = 0.0;
2352     sum8  = 0.0;
2353     sum9  = 0.0;
2354     sum10 = 0.0;
2355     sum11 = 0.0;
2356     sum12 = 0.0;
2357     sum13 = 0.0;
2358     sum14 = 0.0;
2359     sum15 = 0.0;
2360     sum16 = 0.0;
2361     for (j=0; j<n; j++) {
2362       sum1  += v[jrow]*x[16*idx[jrow]];
2363       sum2  += v[jrow]*x[16*idx[jrow]+1];
2364       sum3  += v[jrow]*x[16*idx[jrow]+2];
2365       sum4  += v[jrow]*x[16*idx[jrow]+3];
2366       sum5  += v[jrow]*x[16*idx[jrow]+4];
2367       sum6  += v[jrow]*x[16*idx[jrow]+5];
2368       sum7  += v[jrow]*x[16*idx[jrow]+6];
2369       sum8  += v[jrow]*x[16*idx[jrow]+7];
2370       sum9  += v[jrow]*x[16*idx[jrow]+8];
2371       sum10 += v[jrow]*x[16*idx[jrow]+9];
2372       sum11 += v[jrow]*x[16*idx[jrow]+10];
2373       sum12 += v[jrow]*x[16*idx[jrow]+11];
2374       sum13 += v[jrow]*x[16*idx[jrow]+12];
2375       sum14 += v[jrow]*x[16*idx[jrow]+13];
2376       sum15 += v[jrow]*x[16*idx[jrow]+14];
2377       sum16 += v[jrow]*x[16*idx[jrow]+15];
2378       jrow++;
2379     }
2380     y[16*i]    += sum1;
2381     y[16*i+1]  += sum2;
2382     y[16*i+2]  += sum3;
2383     y[16*i+3]  += sum4;
2384     y[16*i+4]  += sum5;
2385     y[16*i+5]  += sum6;
2386     y[16*i+6]  += sum7;
2387     y[16*i+7]  += sum8;
2388     y[16*i+8]  += sum9;
2389     y[16*i+9]  += sum10;
2390     y[16*i+10] += sum11;
2391     y[16*i+11] += sum12;
2392     y[16*i+12] += sum13;
2393     y[16*i+13] += sum14;
2394     y[16*i+14] += sum15;
2395     y[16*i+15] += sum16;
2396   }
2397 
2398   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2399   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2400   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2401   PetscFunctionReturn(0);
2402 }
2403 
2404 #undef __FUNCT__
2405 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2406 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2407 {
2408   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2409   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2410   const PetscScalar *x,*v;
2411   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2412   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2413   PetscErrorCode    ierr;
2414   const PetscInt    m = b->AIJ->rmap->n,*idx;
2415   PetscInt          n,i;
2416 
2417   PetscFunctionBegin;
2418   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2419   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2420   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2421   for (i=0; i<m; i++) {
2422     idx     = a->j + a->i[i];
2423     v       = a->a + a->i[i];
2424     n       = a->i[i+1] - a->i[i];
2425     alpha1  = x[16*i];
2426     alpha2  = x[16*i+1];
2427     alpha3  = x[16*i+2];
2428     alpha4  = x[16*i+3];
2429     alpha5  = x[16*i+4];
2430     alpha6  = x[16*i+5];
2431     alpha7  = x[16*i+6];
2432     alpha8  = x[16*i+7];
2433     alpha9  = x[16*i+8];
2434     alpha10 = x[16*i+9];
2435     alpha11 = x[16*i+10];
2436     alpha12 = x[16*i+11];
2437     alpha13 = x[16*i+12];
2438     alpha14 = x[16*i+13];
2439     alpha15 = x[16*i+14];
2440     alpha16 = x[16*i+15];
2441     while (n-->0) {
2442       y[16*(*idx)]    += alpha1*(*v);
2443       y[16*(*idx)+1]  += alpha2*(*v);
2444       y[16*(*idx)+2]  += alpha3*(*v);
2445       y[16*(*idx)+3]  += alpha4*(*v);
2446       y[16*(*idx)+4]  += alpha5*(*v);
2447       y[16*(*idx)+5]  += alpha6*(*v);
2448       y[16*(*idx)+6]  += alpha7*(*v);
2449       y[16*(*idx)+7]  += alpha8*(*v);
2450       y[16*(*idx)+8]  += alpha9*(*v);
2451       y[16*(*idx)+9]  += alpha10*(*v);
2452       y[16*(*idx)+10] += alpha11*(*v);
2453       y[16*(*idx)+11] += alpha12*(*v);
2454       y[16*(*idx)+12] += alpha13*(*v);
2455       y[16*(*idx)+13] += alpha14*(*v);
2456       y[16*(*idx)+14] += alpha15*(*v);
2457       y[16*(*idx)+15] += alpha16*(*v);
2458       idx++; v++;
2459     }
2460   }
2461   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2462   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2463   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 /*--------------------------------------------------------------------------------------------*/
2468 #undef __FUNCT__
2469 #define __FUNCT__ "MatMult_SeqMAIJ_18"
2470 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2471 {
2472   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2473   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2474   const PetscScalar *x,*v;
2475   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2476   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2477   PetscErrorCode    ierr;
2478   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2479   PetscInt          nonzerorow=0,n,i,jrow,j;
2480 
2481   PetscFunctionBegin;
2482   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2483   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2484   idx  = a->j;
2485   v    = a->a;
2486   ii   = a->i;
2487 
2488   for (i=0; i<m; i++) {
2489     jrow  = ii[i];
2490     n     = ii[i+1] - jrow;
2491     sum1  = 0.0;
2492     sum2  = 0.0;
2493     sum3  = 0.0;
2494     sum4  = 0.0;
2495     sum5  = 0.0;
2496     sum6  = 0.0;
2497     sum7  = 0.0;
2498     sum8  = 0.0;
2499     sum9  = 0.0;
2500     sum10 = 0.0;
2501     sum11 = 0.0;
2502     sum12 = 0.0;
2503     sum13 = 0.0;
2504     sum14 = 0.0;
2505     sum15 = 0.0;
2506     sum16 = 0.0;
2507     sum17 = 0.0;
2508     sum18 = 0.0;
2509 
2510     nonzerorow += (n>0);
2511     for (j=0; j<n; j++) {
2512       sum1  += v[jrow]*x[18*idx[jrow]];
2513       sum2  += v[jrow]*x[18*idx[jrow]+1];
2514       sum3  += v[jrow]*x[18*idx[jrow]+2];
2515       sum4  += v[jrow]*x[18*idx[jrow]+3];
2516       sum5  += v[jrow]*x[18*idx[jrow]+4];
2517       sum6  += v[jrow]*x[18*idx[jrow]+5];
2518       sum7  += v[jrow]*x[18*idx[jrow]+6];
2519       sum8  += v[jrow]*x[18*idx[jrow]+7];
2520       sum9  += v[jrow]*x[18*idx[jrow]+8];
2521       sum10 += v[jrow]*x[18*idx[jrow]+9];
2522       sum11 += v[jrow]*x[18*idx[jrow]+10];
2523       sum12 += v[jrow]*x[18*idx[jrow]+11];
2524       sum13 += v[jrow]*x[18*idx[jrow]+12];
2525       sum14 += v[jrow]*x[18*idx[jrow]+13];
2526       sum15 += v[jrow]*x[18*idx[jrow]+14];
2527       sum16 += v[jrow]*x[18*idx[jrow]+15];
2528       sum17 += v[jrow]*x[18*idx[jrow]+16];
2529       sum18 += v[jrow]*x[18*idx[jrow]+17];
2530       jrow++;
2531     }
2532     y[18*i]    = sum1;
2533     y[18*i+1]  = sum2;
2534     y[18*i+2]  = sum3;
2535     y[18*i+3]  = sum4;
2536     y[18*i+4]  = sum5;
2537     y[18*i+5]  = sum6;
2538     y[18*i+6]  = sum7;
2539     y[18*i+7]  = sum8;
2540     y[18*i+8]  = sum9;
2541     y[18*i+9]  = sum10;
2542     y[18*i+10] = sum11;
2543     y[18*i+11] = sum12;
2544     y[18*i+12] = sum13;
2545     y[18*i+13] = sum14;
2546     y[18*i+14] = sum15;
2547     y[18*i+15] = sum16;
2548     y[18*i+16] = sum17;
2549     y[18*i+17] = sum18;
2550   }
2551 
2552   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
2553   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2554   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 #undef __FUNCT__
2559 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18"
2560 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2561 {
2562   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2563   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2564   const PetscScalar *x,*v;
2565   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2566   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2567   PetscErrorCode    ierr;
2568   const PetscInt    m = b->AIJ->rmap->n,*idx;
2569   PetscInt          n,i;
2570 
2571   PetscFunctionBegin;
2572   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2573   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2574   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2575 
2576   for (i=0; i<m; i++) {
2577     idx     = a->j + a->i[i];
2578     v       = a->a + a->i[i];
2579     n       = a->i[i+1] - a->i[i];
2580     alpha1  = x[18*i];
2581     alpha2  = x[18*i+1];
2582     alpha3  = x[18*i+2];
2583     alpha4  = x[18*i+3];
2584     alpha5  = x[18*i+4];
2585     alpha6  = x[18*i+5];
2586     alpha7  = x[18*i+6];
2587     alpha8  = x[18*i+7];
2588     alpha9  = x[18*i+8];
2589     alpha10 = x[18*i+9];
2590     alpha11 = x[18*i+10];
2591     alpha12 = x[18*i+11];
2592     alpha13 = x[18*i+12];
2593     alpha14 = x[18*i+13];
2594     alpha15 = x[18*i+14];
2595     alpha16 = x[18*i+15];
2596     alpha17 = x[18*i+16];
2597     alpha18 = x[18*i+17];
2598     while (n-->0) {
2599       y[18*(*idx)]    += alpha1*(*v);
2600       y[18*(*idx)+1]  += alpha2*(*v);
2601       y[18*(*idx)+2]  += alpha3*(*v);
2602       y[18*(*idx)+3]  += alpha4*(*v);
2603       y[18*(*idx)+4]  += alpha5*(*v);
2604       y[18*(*idx)+5]  += alpha6*(*v);
2605       y[18*(*idx)+6]  += alpha7*(*v);
2606       y[18*(*idx)+7]  += alpha8*(*v);
2607       y[18*(*idx)+8]  += alpha9*(*v);
2608       y[18*(*idx)+9]  += alpha10*(*v);
2609       y[18*(*idx)+10] += alpha11*(*v);
2610       y[18*(*idx)+11] += alpha12*(*v);
2611       y[18*(*idx)+12] += alpha13*(*v);
2612       y[18*(*idx)+13] += alpha14*(*v);
2613       y[18*(*idx)+14] += alpha15*(*v);
2614       y[18*(*idx)+15] += alpha16*(*v);
2615       y[18*(*idx)+16] += alpha17*(*v);
2616       y[18*(*idx)+17] += alpha18*(*v);
2617       idx++; v++;
2618     }
2619   }
2620   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2621   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2622   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2623   PetscFunctionReturn(0);
2624 }
2625 
2626 #undef __FUNCT__
2627 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18"
2628 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2629 {
2630   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2631   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2632   const PetscScalar *x,*v;
2633   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2634   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2635   PetscErrorCode    ierr;
2636   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2637   PetscInt          n,i,jrow,j;
2638 
2639   PetscFunctionBegin;
2640   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2641   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2642   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2643   idx  = a->j;
2644   v    = a->a;
2645   ii   = a->i;
2646 
2647   for (i=0; i<m; i++) {
2648     jrow  = ii[i];
2649     n     = ii[i+1] - jrow;
2650     sum1  = 0.0;
2651     sum2  = 0.0;
2652     sum3  = 0.0;
2653     sum4  = 0.0;
2654     sum5  = 0.0;
2655     sum6  = 0.0;
2656     sum7  = 0.0;
2657     sum8  = 0.0;
2658     sum9  = 0.0;
2659     sum10 = 0.0;
2660     sum11 = 0.0;
2661     sum12 = 0.0;
2662     sum13 = 0.0;
2663     sum14 = 0.0;
2664     sum15 = 0.0;
2665     sum16 = 0.0;
2666     sum17 = 0.0;
2667     sum18 = 0.0;
2668     for (j=0; j<n; j++) {
2669       sum1  += v[jrow]*x[18*idx[jrow]];
2670       sum2  += v[jrow]*x[18*idx[jrow]+1];
2671       sum3  += v[jrow]*x[18*idx[jrow]+2];
2672       sum4  += v[jrow]*x[18*idx[jrow]+3];
2673       sum5  += v[jrow]*x[18*idx[jrow]+4];
2674       sum6  += v[jrow]*x[18*idx[jrow]+5];
2675       sum7  += v[jrow]*x[18*idx[jrow]+6];
2676       sum8  += v[jrow]*x[18*idx[jrow]+7];
2677       sum9  += v[jrow]*x[18*idx[jrow]+8];
2678       sum10 += v[jrow]*x[18*idx[jrow]+9];
2679       sum11 += v[jrow]*x[18*idx[jrow]+10];
2680       sum12 += v[jrow]*x[18*idx[jrow]+11];
2681       sum13 += v[jrow]*x[18*idx[jrow]+12];
2682       sum14 += v[jrow]*x[18*idx[jrow]+13];
2683       sum15 += v[jrow]*x[18*idx[jrow]+14];
2684       sum16 += v[jrow]*x[18*idx[jrow]+15];
2685       sum17 += v[jrow]*x[18*idx[jrow]+16];
2686       sum18 += v[jrow]*x[18*idx[jrow]+17];
2687       jrow++;
2688     }
2689     y[18*i]    += sum1;
2690     y[18*i+1]  += sum2;
2691     y[18*i+2]  += sum3;
2692     y[18*i+3]  += sum4;
2693     y[18*i+4]  += sum5;
2694     y[18*i+5]  += sum6;
2695     y[18*i+6]  += sum7;
2696     y[18*i+7]  += sum8;
2697     y[18*i+8]  += sum9;
2698     y[18*i+9]  += sum10;
2699     y[18*i+10] += sum11;
2700     y[18*i+11] += sum12;
2701     y[18*i+12] += sum13;
2702     y[18*i+13] += sum14;
2703     y[18*i+14] += sum15;
2704     y[18*i+15] += sum16;
2705     y[18*i+16] += sum17;
2706     y[18*i+17] += sum18;
2707   }
2708 
2709   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2710   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2711   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2712   PetscFunctionReturn(0);
2713 }
2714 
2715 #undef __FUNCT__
2716 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18"
2717 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2718 {
2719   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2720   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2721   const PetscScalar *x,*v;
2722   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2723   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2724   PetscErrorCode    ierr;
2725   const PetscInt    m = b->AIJ->rmap->n,*idx;
2726   PetscInt          n,i;
2727 
2728   PetscFunctionBegin;
2729   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2730   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2731   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2732   for (i=0; i<m; i++) {
2733     idx     = a->j + a->i[i];
2734     v       = a->a + a->i[i];
2735     n       = a->i[i+1] - a->i[i];
2736     alpha1  = x[18*i];
2737     alpha2  = x[18*i+1];
2738     alpha3  = x[18*i+2];
2739     alpha4  = x[18*i+3];
2740     alpha5  = x[18*i+4];
2741     alpha6  = x[18*i+5];
2742     alpha7  = x[18*i+6];
2743     alpha8  = x[18*i+7];
2744     alpha9  = x[18*i+8];
2745     alpha10 = x[18*i+9];
2746     alpha11 = x[18*i+10];
2747     alpha12 = x[18*i+11];
2748     alpha13 = x[18*i+12];
2749     alpha14 = x[18*i+13];
2750     alpha15 = x[18*i+14];
2751     alpha16 = x[18*i+15];
2752     alpha17 = x[18*i+16];
2753     alpha18 = x[18*i+17];
2754     while (n-->0) {
2755       y[18*(*idx)]    += alpha1*(*v);
2756       y[18*(*idx)+1]  += alpha2*(*v);
2757       y[18*(*idx)+2]  += alpha3*(*v);
2758       y[18*(*idx)+3]  += alpha4*(*v);
2759       y[18*(*idx)+4]  += alpha5*(*v);
2760       y[18*(*idx)+5]  += alpha6*(*v);
2761       y[18*(*idx)+6]  += alpha7*(*v);
2762       y[18*(*idx)+7]  += alpha8*(*v);
2763       y[18*(*idx)+8]  += alpha9*(*v);
2764       y[18*(*idx)+9]  += alpha10*(*v);
2765       y[18*(*idx)+10] += alpha11*(*v);
2766       y[18*(*idx)+11] += alpha12*(*v);
2767       y[18*(*idx)+12] += alpha13*(*v);
2768       y[18*(*idx)+13] += alpha14*(*v);
2769       y[18*(*idx)+14] += alpha15*(*v);
2770       y[18*(*idx)+15] += alpha16*(*v);
2771       y[18*(*idx)+16] += alpha17*(*v);
2772       y[18*(*idx)+17] += alpha18*(*v);
2773       idx++; v++;
2774     }
2775   }
2776   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2777   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2778   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2779   PetscFunctionReturn(0);
2780 }
2781 
2782 #undef __FUNCT__
2783 #define __FUNCT__ "MatMult_SeqMAIJ_N"
2784 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2785 {
2786   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2787   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2788   const PetscScalar *x,*v;
2789   PetscScalar       *y,*sums;
2790   PetscErrorCode    ierr;
2791   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2792   PetscInt          n,i,jrow,j,dof = b->dof,k;
2793 
2794   PetscFunctionBegin;
2795   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2796   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2797   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2798   idx  = a->j;
2799   v    = a->a;
2800   ii   = a->i;
2801 
2802   for (i=0; i<m; i++) {
2803     jrow = ii[i];
2804     n    = ii[i+1] - jrow;
2805     sums = y + dof*i;
2806     for (j=0; j<n; j++) {
2807       for (k=0; k<dof; k++) {
2808         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2809       }
2810       jrow++;
2811     }
2812   }
2813 
2814   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2815   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2816   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2817   PetscFunctionReturn(0);
2818 }
2819 
2820 #undef __FUNCT__
2821 #define __FUNCT__ "MatMultAdd_SeqMAIJ_N"
2822 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2823 {
2824   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2825   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2826   const PetscScalar *x,*v;
2827   PetscScalar       *y,*sums;
2828   PetscErrorCode    ierr;
2829   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2830   PetscInt          n,i,jrow,j,dof = b->dof,k;
2831 
2832   PetscFunctionBegin;
2833   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2834   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2835   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2836   idx  = a->j;
2837   v    = a->a;
2838   ii   = a->i;
2839 
2840   for (i=0; i<m; i++) {
2841     jrow = ii[i];
2842     n    = ii[i+1] - jrow;
2843     sums = y + dof*i;
2844     for (j=0; j<n; j++) {
2845       for (k=0; k<dof; k++) {
2846         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2847       }
2848       jrow++;
2849     }
2850   }
2851 
2852   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2853   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2854   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2855   PetscFunctionReturn(0);
2856 }
2857 
2858 #undef __FUNCT__
2859 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N"
2860 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2861 {
2862   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2863   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2864   const PetscScalar *x,*v,*alpha;
2865   PetscScalar       *y;
2866   PetscErrorCode    ierr;
2867   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2868   PetscInt          n,i,k;
2869 
2870   PetscFunctionBegin;
2871   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2872   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2873   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2874   for (i=0; i<m; i++) {
2875     idx   = a->j + a->i[i];
2876     v     = a->a + a->i[i];
2877     n     = a->i[i+1] - a->i[i];
2878     alpha = x + dof*i;
2879     while (n-->0) {
2880       for (k=0; k<dof; k++) {
2881         y[dof*(*idx)+k] += alpha[k]*(*v);
2882       }
2883       idx++; v++;
2884     }
2885   }
2886   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2887   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2888   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2889   PetscFunctionReturn(0);
2890 }
2891 
2892 #undef __FUNCT__
2893 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N"
2894 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2895 {
2896   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2897   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2898   const PetscScalar *x,*v,*alpha;
2899   PetscScalar       *y;
2900   PetscErrorCode    ierr;
2901   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2902   PetscInt          n,i,k;
2903 
2904   PetscFunctionBegin;
2905   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2906   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2907   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2908   for (i=0; i<m; i++) {
2909     idx   = a->j + a->i[i];
2910     v     = a->a + a->i[i];
2911     n     = a->i[i+1] - a->i[i];
2912     alpha = x + dof*i;
2913     while (n-->0) {
2914       for (k=0; k<dof; k++) {
2915         y[dof*(*idx)+k] += alpha[k]*(*v);
2916       }
2917       idx++; v++;
2918     }
2919   }
2920   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2921   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2922   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2923   PetscFunctionReturn(0);
2924 }
2925 
2926 /*===================================================================================*/
2927 #undef __FUNCT__
2928 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2929 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2930 {
2931   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2932   PetscErrorCode ierr;
2933 
2934   PetscFunctionBegin;
2935   /* start the scatter */
2936   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2937   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2938   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2939   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2940   PetscFunctionReturn(0);
2941 }
2942 
2943 #undef __FUNCT__
2944 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2945 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2946 {
2947   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2948   PetscErrorCode ierr;
2949 
2950   PetscFunctionBegin;
2951   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2952   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2953   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2954   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2955   PetscFunctionReturn(0);
2956 }
2957 
2958 #undef __FUNCT__
2959 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2960 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2961 {
2962   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2963   PetscErrorCode ierr;
2964 
2965   PetscFunctionBegin;
2966   /* start the scatter */
2967   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2968   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2969   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2970   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2971   PetscFunctionReturn(0);
2972 }
2973 
2974 #undef __FUNCT__
2975 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2976 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2977 {
2978   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2979   PetscErrorCode ierr;
2980 
2981   PetscFunctionBegin;
2982   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2983   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2984   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2985   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2986   PetscFunctionReturn(0);
2987 }
2988 
2989 /* ----------------------------------------------------------------*/
2990 #undef __FUNCT__
2991 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
2992 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2993 {
2994   PetscErrorCode     ierr;
2995   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2996   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2997   Mat                P         =pp->AIJ;
2998   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2999   PetscInt           *pti,*ptj,*ptJ;
3000   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
3001   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
3002   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
3003   MatScalar          *ca;
3004   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
3005 
3006   PetscFunctionBegin;
3007   /* Get ij structure of P^T */
3008   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
3009 
3010   cn = pn*ppdof;
3011   /* Allocate ci array, arrays for fill computation and */
3012   /* free space for accumulating nonzero column info */
3013   ierr  = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr);
3014   ci[0] = 0;
3015 
3016   /* Work arrays for rows of P^T*A */
3017   ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr);
3018   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
3019   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
3020 
3021   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
3022   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
3023   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3024   ierr          = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr);
3025   current_space = free_space;
3026 
3027   /* Determine symbolic info for each row of C: */
3028   for (i=0; i<pn; i++) {
3029     ptnzi = pti[i+1] - pti[i];
3030     ptJ   = ptj + pti[i];
3031     for (dof=0; dof<ppdof; dof++) {
3032       ptanzi = 0;
3033       /* Determine symbolic row of PtA: */
3034       for (j=0; j<ptnzi; j++) {
3035         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
3036         arow = ptJ[j]*ppdof + dof;
3037         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
3038         anzj = ai[arow+1] - ai[arow];
3039         ajj  = aj + ai[arow];
3040         for (k=0; k<anzj; k++) {
3041           if (!ptadenserow[ajj[k]]) {
3042             ptadenserow[ajj[k]]    = -1;
3043             ptasparserow[ptanzi++] = ajj[k];
3044           }
3045         }
3046       }
3047       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
3048       ptaj = ptasparserow;
3049       cnzi = 0;
3050       for (j=0; j<ptanzi; j++) {
3051         /* Get offset within block of P */
3052         pshift = *ptaj%ppdof;
3053         /* Get block row of P */
3054         prow = (*ptaj++)/ppdof; /* integer division */
3055         /* P has same number of nonzeros per row as the compressed form */
3056         pnzj = pi[prow+1] - pi[prow];
3057         pjj  = pj + pi[prow];
3058         for (k=0;k<pnzj;k++) {
3059           /* Locations in C are shifted by the offset within the block */
3060           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3061           if (!denserow[pjj[k]*ppdof+pshift]) {
3062             denserow[pjj[k]*ppdof+pshift] = -1;
3063             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
3064           }
3065         }
3066       }
3067 
3068       /* sort sparserow */
3069       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
3070 
3071       /* If free space is not available, make more free space */
3072       /* Double the amount of total space in the list */
3073       if (current_space->local_remaining<cnzi) {
3074         ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
3075       }
3076 
3077       /* Copy data into free space, and zero out denserows */
3078       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
3079 
3080       current_space->array           += cnzi;
3081       current_space->local_used      += cnzi;
3082       current_space->local_remaining -= cnzi;
3083 
3084       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
3085       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
3086 
3087       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3088       /*        For now, we will recompute what is needed. */
3089       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3090     }
3091   }
3092   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3093   /* Allocate space for cj, initialize cj, and */
3094   /* destroy list of free space and other temporary array(s) */
3095   ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr);
3096   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
3097   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
3098 
3099   /* Allocate space for ca */
3100   ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr);
3101 
3102   /* put together the new matrix */
3103   ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
3104   ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr);
3105 
3106   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3107   /* Since these are PETSc arrays, change flags to free them as necessary. */
3108   c          = (Mat_SeqAIJ*)((*C)->data);
3109   c->free_a  = PETSC_TRUE;
3110   c->free_ij = PETSC_TRUE;
3111   c->nonew   = 0;
3112 
3113   (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3114 
3115   /* Clean up. */
3116   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
3117   PetscFunctionReturn(0);
3118 }
3119 
3120 #undef __FUNCT__
3121 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
3122 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3123 {
3124   /* This routine requires testing -- first draft only */
3125   PetscErrorCode  ierr;
3126   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3127   Mat             P  =pp->AIJ;
3128   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
3129   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
3130   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3131   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3132   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3133   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3134   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3135   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3136   MatScalar       *ca=c->a,*caj,*apa;
3137 
3138   PetscFunctionBegin;
3139   /* Allocate temporary array for storage of one row of A*P */
3140   ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr);
3141 
3142   /* Clear old values in C */
3143   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
3144 
3145   for (i=0; i<am; i++) {
3146     /* Form sparse row of A*P */
3147     anzi  = ai[i+1] - ai[i];
3148     apnzj = 0;
3149     for (j=0; j<anzi; j++) {
3150       /* Get offset within block of P */
3151       pshift = *aj%ppdof;
3152       /* Get block row of P */
3153       prow = *aj++/ppdof;   /* integer division */
3154       pnzj = pi[prow+1] - pi[prow];
3155       pjj  = pj + pi[prow];
3156       paj  = pa + pi[prow];
3157       for (k=0; k<pnzj; k++) {
3158         poffset = pjj[k]*ppdof+pshift;
3159         if (!apjdense[poffset]) {
3160           apjdense[poffset] = -1;
3161           apj[apnzj++]      = poffset;
3162         }
3163         apa[poffset] += (*aa)*paj[k];
3164       }
3165       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
3166       aa++;
3167     }
3168 
3169     /* Sort the j index array for quick sparse axpy. */
3170     /* Note: a array does not need sorting as it is in dense storage locations. */
3171     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
3172 
3173     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3174     prow    = i/ppdof; /* integer division */
3175     pshift  = i%ppdof;
3176     poffset = pi[prow];
3177     pnzi    = pi[prow+1] - poffset;
3178     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3179     pJ = pj+poffset;
3180     pA = pa+poffset;
3181     for (j=0; j<pnzi; j++) {
3182       crow = (*pJ)*ppdof+pshift;
3183       cjj  = cj + ci[crow];
3184       caj  = ca + ci[crow];
3185       pJ++;
3186       /* Perform sparse axpy operation.  Note cjj includes apj. */
3187       for (k=0,nextap=0; nextap<apnzj; k++) {
3188         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3189       }
3190       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
3191       pA++;
3192     }
3193 
3194     /* Zero the current row info for A*P */
3195     for (j=0; j<apnzj; j++) {
3196       apa[apj[j]]      = 0.;
3197       apjdense[apj[j]] = 0;
3198     }
3199   }
3200 
3201   /* Assemble the final matrix and clean up */
3202   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3203   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3204   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
3205   PetscFunctionReturn(0);
3206 }
3207 
3208 #undef __FUNCT__
3209 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqMAIJ"
3210 PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3211 {
3212   PetscErrorCode ierr;
3213 
3214   PetscFunctionBegin;
3215   if (scall == MAT_INITIAL_MATRIX) {
3216     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3217     ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr);
3218     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3219   }
3220   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3221   ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr);
3222   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3223   PetscFunctionReturn(0);
3224 }
3225 
3226 #undef __FUNCT__
3227 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ"
3228 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3229 {
3230   PetscErrorCode ierr;
3231 
3232   PetscFunctionBegin;
3233   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3234   ierr = MatConvert(PP,MATMPIAIJ,MAT_INPLACE_MATRIX,&PP);CHKERRQ(ierr);
3235   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
3236   PetscFunctionReturn(0);
3237 }
3238 
3239 #undef __FUNCT__
3240 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ"
3241 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3242 {
3243   PetscFunctionBegin;
3244   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3245   PetscFunctionReturn(0);
3246 }
3247 
3248 #undef __FUNCT__
3249 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIMAIJ"
3250 PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3251 {
3252   PetscErrorCode ierr;
3253 
3254   PetscFunctionBegin;
3255   if (scall == MAT_INITIAL_MATRIX) {
3256     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3257     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr);
3258     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3259   }
3260   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3261   ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr);
3262   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3263   PetscFunctionReturn(0);
3264 }
3265 
3266 #undef __FUNCT__
3267 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
3268 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3269 {
3270   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3271   Mat            a    = b->AIJ,B;
3272   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3273   PetscErrorCode ierr;
3274   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3275   PetscInt       *cols;
3276   PetscScalar    *vals;
3277 
3278   PetscFunctionBegin;
3279   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3280   ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr);
3281   for (i=0; i<m; i++) {
3282     nmax = PetscMax(nmax,aij->ilen[i]);
3283     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3284   }
3285   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
3286   ierr = PetscFree(ilen);CHKERRQ(ierr);
3287   ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr);
3288   ii   = 0;
3289   for (i=0; i<m; i++) {
3290     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3291     for (j=0; j<dof; j++) {
3292       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3293       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3294       ii++;
3295     }
3296     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3297   }
3298   ierr = PetscFree(icols);CHKERRQ(ierr);
3299   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3300   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3301 
3302   if (reuse == MAT_INPLACE_MATRIX) {
3303     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3304   } else {
3305     *newmat = B;
3306   }
3307   PetscFunctionReturn(0);
3308 }
3309 
3310 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3311 
3312 #undef __FUNCT__
3313 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
3314 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3315 {
3316   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3317   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3318   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3319   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3320   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3321   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3322   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3323   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3324   PetscInt       rstart,cstart,*garray,ii,k;
3325   PetscErrorCode ierr;
3326   PetscScalar    *vals,*ovals;
3327 
3328   PetscFunctionBegin;
3329   ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr);
3330   for (i=0; i<A->rmap->n/dof; i++) {
3331     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3332     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3333     for (j=0; j<dof; j++) {
3334       dnz[dof*i+j] = AIJ->ilen[i];
3335       onz[dof*i+j] = OAIJ->ilen[i];
3336     }
3337   }
3338   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
3339   ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr);
3340   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
3341 
3342   ierr   = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr);
3343   rstart = dof*maij->A->rmap->rstart;
3344   cstart = dof*maij->A->cmap->rstart;
3345   garray = mpiaij->garray;
3346 
3347   ii = rstart;
3348   for (i=0; i<A->rmap->n/dof; i++) {
3349     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3350     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3351     for (j=0; j<dof; j++) {
3352       for (k=0; k<ncols; k++) {
3353         icols[k] = cstart + dof*cols[k]+j;
3354       }
3355       for (k=0; k<oncols; k++) {
3356         oicols[k] = dof*garray[ocols[k]]+j;
3357       }
3358       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3359       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
3360       ii++;
3361     }
3362     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3363     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3364   }
3365   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
3366 
3367   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3368   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3369 
3370   if (reuse == MAT_INPLACE_MATRIX) {
3371     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3372     ((PetscObject)A)->refct = 1;
3373 
3374     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3375 
3376     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3377   } else {
3378     *newmat = B;
3379   }
3380   PetscFunctionReturn(0);
3381 }
3382 
3383 #undef __FUNCT__
3384 #define __FUNCT__ "MatGetSubMatrix_MAIJ"
3385 PetscErrorCode  MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3386 {
3387   PetscErrorCode ierr;
3388   Mat            A;
3389 
3390   PetscFunctionBegin;
3391   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3392   ierr = MatGetSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
3393   ierr = MatDestroy(&A);CHKERRQ(ierr);
3394   PetscFunctionReturn(0);
3395 }
3396 
3397 /* ---------------------------------------------------------------------------------- */
3398 #undef __FUNCT__
3399 #define __FUNCT__ "MatCreateMAIJ"
3400 /*@C
3401   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3402   operations for multicomponent problems.  It interpolates each component the same
3403   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3404   and MATMPIAIJ for distributed matrices.
3405 
3406   Collective
3407 
3408   Input Parameters:
3409 + A - the AIJ matrix describing the action on blocks
3410 - dof - the block size (number of components per node)
3411 
3412   Output Parameter:
3413 . maij - the new MAIJ matrix
3414 
3415   Operations provided:
3416 + MatMult
3417 . MatMultTranspose
3418 . MatMultAdd
3419 . MatMultTransposeAdd
3420 - MatView
3421 
3422   Level: advanced
3423 
3424 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3425 @*/
3426 PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3427 {
3428   PetscErrorCode ierr;
3429   PetscMPIInt    size;
3430   PetscInt       n;
3431   Mat            B;
3432 
3433   PetscFunctionBegin;
3434   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3435 
3436   if (dof == 1) *maij = A;
3437   else {
3438     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3439     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3440     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3441     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3442     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3443     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3444 
3445     B->assembled = PETSC_TRUE;
3446 
3447     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3448     if (size == 1) {
3449       Mat_SeqMAIJ *b;
3450 
3451       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
3452 
3453       B->ops->setup   = NULL;
3454       B->ops->destroy = MatDestroy_SeqMAIJ;
3455       B->ops->view    = MatView_SeqMAIJ;
3456       b               = (Mat_SeqMAIJ*)B->data;
3457       b->dof          = dof;
3458       b->AIJ          = A;
3459 
3460       if (dof == 2) {
3461         B->ops->mult             = MatMult_SeqMAIJ_2;
3462         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3463         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3464         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3465       } else if (dof == 3) {
3466         B->ops->mult             = MatMult_SeqMAIJ_3;
3467         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3468         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3469         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3470       } else if (dof == 4) {
3471         B->ops->mult             = MatMult_SeqMAIJ_4;
3472         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3473         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3474         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3475       } else if (dof == 5) {
3476         B->ops->mult             = MatMult_SeqMAIJ_5;
3477         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3478         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3479         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3480       } else if (dof == 6) {
3481         B->ops->mult             = MatMult_SeqMAIJ_6;
3482         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3483         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3484         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3485       } else if (dof == 7) {
3486         B->ops->mult             = MatMult_SeqMAIJ_7;
3487         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3488         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3489         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3490       } else if (dof == 8) {
3491         B->ops->mult             = MatMult_SeqMAIJ_8;
3492         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3493         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3494         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3495       } else if (dof == 9) {
3496         B->ops->mult             = MatMult_SeqMAIJ_9;
3497         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3498         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3499         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3500       } else if (dof == 10) {
3501         B->ops->mult             = MatMult_SeqMAIJ_10;
3502         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3503         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3504         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3505       } else if (dof == 11) {
3506         B->ops->mult             = MatMult_SeqMAIJ_11;
3507         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3508         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3509         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3510       } else if (dof == 16) {
3511         B->ops->mult             = MatMult_SeqMAIJ_16;
3512         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3513         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3514         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3515       } else if (dof == 18) {
3516         B->ops->mult             = MatMult_SeqMAIJ_18;
3517         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3518         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3519         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3520       } else {
3521         B->ops->mult             = MatMult_SeqMAIJ_N;
3522         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3523         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3524         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3525       }
3526       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3527       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3528     } else {
3529       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3530       Mat_MPIMAIJ *b;
3531       IS          from,to;
3532       Vec         gvec;
3533 
3534       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3535 
3536       B->ops->setup   = NULL;
3537       B->ops->destroy = MatDestroy_MPIMAIJ;
3538       B->ops->view    = MatView_MPIMAIJ;
3539 
3540       b      = (Mat_MPIMAIJ*)B->data;
3541       b->dof = dof;
3542       b->A   = A;
3543 
3544       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
3545       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
3546 
3547       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3548       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3549       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
3550       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3551       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3552 
3553       /* create two temporary Index sets for build scatter gather */
3554       ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3555       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3556 
3557       /* create temporary global vector to generate scatter context */
3558       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
3559 
3560       /* generate the scatter context */
3561       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3562 
3563       ierr = ISDestroy(&from);CHKERRQ(ierr);
3564       ierr = ISDestroy(&to);CHKERRQ(ierr);
3565       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3566 
3567       B->ops->mult             = MatMult_MPIMAIJ_dof;
3568       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3569       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3570       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3571 
3572       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3573       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr);
3574     }
3575     B->ops->getsubmatrix = MatGetSubMatrix_MAIJ;
3576     ierr  = MatSetUp(B);CHKERRQ(ierr);
3577     *maij = B;
3578     ierr  = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr);
3579   }
3580   PetscFunctionReturn(0);
3581 }
3582