xref: /petsc/src/mat/impls/maij/maij.c (revision c3d102fe2dfc19c1832704c537829f04ceb136a2)
1 /*
2     Defines the basic matrix operations for the MAIJ  matrix storage format.
3   This format is used for restriction and interpolation operations for
4   multicomponent problems. It interpolates each component the same way
5   independently.
6 
7      We provide:
8          MatMult()
9          MatMultTranspose()
10          MatMultTransposeAdd()
11          MatMultAdd()
12           and
13          MatCreateMAIJ(Mat,dof,Mat*)
14 
15      This single directory handles both the sequential and parallel codes
16 */
17 
18 #include "src/mat/impls/maij/maij.h"
19 #include "src/mat/utils/freespace.h"
20 #include "vecimpl.h"
21 
22 #undef __FUNCT__
23 #define __FUNCT__ "MatMAIJGetAIJ"
24 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
25 {
26   PetscErrorCode ierr;
27   PetscTruth     ismpimaij,isseqmaij;
28 
29   PetscFunctionBegin;
30   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
31   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
32   if (ismpimaij) {
33     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
34 
35     *B = b->A;
36   } else if (isseqmaij) {
37     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
38 
39     *B = b->AIJ;
40   } else {
41     *B = A;
42   }
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "MatMAIJRedimension"
48 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
49 {
50   PetscErrorCode ierr;
51   Mat            Aij;
52 
53   PetscFunctionBegin;
54   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
55   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "MatDestroy_SeqMAIJ"
61 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
62 {
63   PetscErrorCode ierr;
64   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
65 
66   PetscFunctionBegin;
67   if (b->AIJ) {
68     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
69   }
70   ierr = PetscFree(b);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatView_SeqMAIJ"
76 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
77 {
78   PetscErrorCode ierr;
79   Mat            B;
80 
81   PetscFunctionBegin;
82   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
83   ierr = MatView(B,viewer);CHKERRQ(ierr);
84   ierr = MatDestroy(B);CHKERRQ(ierr);
85   PetscFunctionReturn(0);
86 }
87 
88 #undef __FUNCT__
89 #define __FUNCT__ "MatView_MPIMAIJ"
90 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
91 {
92   PetscErrorCode ierr;
93   Mat            B;
94 
95   PetscFunctionBegin;
96   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
97   ierr = MatView(B,viewer);CHKERRQ(ierr);
98   ierr = MatDestroy(B);CHKERRQ(ierr);
99   PetscFunctionReturn(0);
100 }
101 
102 #undef __FUNCT__
103 #define __FUNCT__ "MatDestroy_MPIMAIJ"
104 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
105 {
106   PetscErrorCode ierr;
107   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
108 
109   PetscFunctionBegin;
110   if (b->AIJ) {
111     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
112   }
113   if (b->OAIJ) {
114     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
115   }
116   if (b->A) {
117     ierr = MatDestroy(b->A);CHKERRQ(ierr);
118   }
119   if (b->ctx) {
120     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
121   }
122   if (b->w) {
123     ierr = VecDestroy(b->w);CHKERRQ(ierr);
124   }
125   ierr = PetscFree(b);CHKERRQ(ierr);
126   PetscFunctionReturn(0);
127 }
128 
129 /*MC
130   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
131   multicomponent problems, interpolating or restricting each component the same way independently.
132   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
133 
134   Operations provided:
135 . MatMult
136 . MatMultTranspose
137 . MatMultAdd
138 . MatMultTransposeAdd
139 
140   Level: advanced
141 
142 .seealso: MatCreateSeqDense
143 M*/
144 
145 EXTERN_C_BEGIN
146 #undef __FUNCT__
147 #define __FUNCT__ "MatCreate_MAIJ"
148 PetscErrorCode MatCreate_MAIJ(Mat A)
149 {
150   PetscErrorCode ierr;
151   Mat_MPIMAIJ    *b;
152 
153   PetscFunctionBegin;
154   ierr     = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr);
155   A->data  = (void*)b;
156   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
157   A->factor           = 0;
158   A->mapping          = 0;
159 
160   b->AIJ  = 0;
161   b->dof  = 0;
162   b->OAIJ = 0;
163   b->ctx  = 0;
164   b->w    = 0;
165   PetscFunctionReturn(0);
166 }
167 EXTERN_C_END
168 
169 /* --------------------------------------------------------------------------------------*/
170 #undef __FUNCT__
171 #define __FUNCT__ "MatMult_SeqMAIJ_2"
172 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
173 {
174   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
175   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
176   PetscScalar    *x,*y,*v,sum1, sum2;
177   PetscErrorCode ierr;
178   PetscInt       m = b->AIJ->m,*idx,*ii;
179   PetscInt       n,i,jrow,j;
180 
181   PetscFunctionBegin;
182   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
183   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
184   idx  = a->j;
185   v    = a->a;
186   ii   = a->i;
187 
188   for (i=0; i<m; i++) {
189     jrow = ii[i];
190     n    = ii[i+1] - jrow;
191     sum1  = 0.0;
192     sum2  = 0.0;
193     for (j=0; j<n; j++) {
194       sum1 += v[jrow]*x[2*idx[jrow]];
195       sum2 += v[jrow]*x[2*idx[jrow]+1];
196       jrow++;
197      }
198     y[2*i]   = sum1;
199     y[2*i+1] = sum2;
200   }
201 
202   PetscLogFlops(4*a->nz - 2*m);
203   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
204   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
205   PetscFunctionReturn(0);
206 }
207 
208 #undef __FUNCT__
209 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
210 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
211 {
212   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
213   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
214   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
215   PetscErrorCode ierr;
216   PetscInt       m = b->AIJ->m,n,i,*idx;
217 
218   PetscFunctionBegin;
219   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
220   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
221   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
222 
223   for (i=0; i<m; i++) {
224     idx    = a->j + a->i[i] ;
225     v      = a->a + a->i[i] ;
226     n      = a->i[i+1] - a->i[i];
227     alpha1 = x[2*i];
228     alpha2 = x[2*i+1];
229     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
230   }
231   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
232   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
233   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
234   PetscFunctionReturn(0);
235 }
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
239 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
240 {
241   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
242   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
243   PetscScalar    *x,*y,*v,sum1, sum2;
244   PetscErrorCode ierr;
245   PetscInt       m = b->AIJ->m,*idx,*ii;
246   PetscInt       n,i,jrow,j;
247 
248   PetscFunctionBegin;
249   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
250   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
251   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
252   idx  = a->j;
253   v    = a->a;
254   ii   = a->i;
255 
256   for (i=0; i<m; i++) {
257     jrow = ii[i];
258     n    = ii[i+1] - jrow;
259     sum1  = 0.0;
260     sum2  = 0.0;
261     for (j=0; j<n; j++) {
262       sum1 += v[jrow]*x[2*idx[jrow]];
263       sum2 += v[jrow]*x[2*idx[jrow]+1];
264       jrow++;
265      }
266     y[2*i]   += sum1;
267     y[2*i+1] += sum2;
268   }
269 
270   PetscLogFlops(4*a->nz - 2*m);
271   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
272   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
273   PetscFunctionReturn(0);
274 }
275 #undef __FUNCT__
276 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
277 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
278 {
279   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
281   PetscScalar    *x,*y,*v,alpha1,alpha2;
282   PetscErrorCode ierr;
283   PetscInt       m = b->AIJ->m,n,i,*idx;
284 
285   PetscFunctionBegin;
286   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
287   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
288   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
289 
290   for (i=0; i<m; i++) {
291     idx   = a->j + a->i[i] ;
292     v     = a->a + a->i[i] ;
293     n     = a->i[i+1] - a->i[i];
294     alpha1 = x[2*i];
295     alpha2 = x[2*i+1];
296     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
297   }
298   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
299   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
300   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
301   PetscFunctionReturn(0);
302 }
303 /* --------------------------------------------------------------------------------------*/
304 #undef __FUNCT__
305 #define __FUNCT__ "MatMult_SeqMAIJ_3"
306 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
307 {
308   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
309   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
310   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
311   PetscErrorCode ierr;
312   PetscInt       m = b->AIJ->m,*idx,*ii;
313   PetscInt       n,i,jrow,j;
314 
315   PetscFunctionBegin;
316   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
317   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
318   idx  = a->j;
319   v    = a->a;
320   ii   = a->i;
321 
322   for (i=0; i<m; i++) {
323     jrow = ii[i];
324     n    = ii[i+1] - jrow;
325     sum1  = 0.0;
326     sum2  = 0.0;
327     sum3  = 0.0;
328     for (j=0; j<n; j++) {
329       sum1 += v[jrow]*x[3*idx[jrow]];
330       sum2 += v[jrow]*x[3*idx[jrow]+1];
331       sum3 += v[jrow]*x[3*idx[jrow]+2];
332       jrow++;
333      }
334     y[3*i]   = sum1;
335     y[3*i+1] = sum2;
336     y[3*i+2] = sum3;
337   }
338 
339   PetscLogFlops(6*a->nz - 3*m);
340   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
341   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 #undef __FUNCT__
346 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
347 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
348 {
349   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
350   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
351   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
352   PetscErrorCode ierr;
353   PetscInt       m = b->AIJ->m,n,i,*idx;
354 
355   PetscFunctionBegin;
356   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
357   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
358   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
359 
360   for (i=0; i<m; i++) {
361     idx    = a->j + a->i[i];
362     v      = a->a + a->i[i];
363     n      = a->i[i+1] - a->i[i];
364     alpha1 = x[3*i];
365     alpha2 = x[3*i+1];
366     alpha3 = x[3*i+2];
367     while (n-->0) {
368       y[3*(*idx)]   += alpha1*(*v);
369       y[3*(*idx)+1] += alpha2*(*v);
370       y[3*(*idx)+2] += alpha3*(*v);
371       idx++; v++;
372     }
373   }
374   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
375   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
376   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
377   PetscFunctionReturn(0);
378 }
379 
380 #undef __FUNCT__
381 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
382 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
383 {
384   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
385   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
386   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
387   PetscErrorCode ierr;
388   PetscInt       m = b->AIJ->m,*idx,*ii;
389   PetscInt       n,i,jrow,j;
390 
391   PetscFunctionBegin;
392   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
393   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
394   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
395   idx  = a->j;
396   v    = a->a;
397   ii   = a->i;
398 
399   for (i=0; i<m; i++) {
400     jrow = ii[i];
401     n    = ii[i+1] - jrow;
402     sum1  = 0.0;
403     sum2  = 0.0;
404     sum3  = 0.0;
405     for (j=0; j<n; j++) {
406       sum1 += v[jrow]*x[3*idx[jrow]];
407       sum2 += v[jrow]*x[3*idx[jrow]+1];
408       sum3 += v[jrow]*x[3*idx[jrow]+2];
409       jrow++;
410      }
411     y[3*i]   += sum1;
412     y[3*i+1] += sum2;
413     y[3*i+2] += sum3;
414   }
415 
416   PetscLogFlops(6*a->nz);
417   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
418   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
419   PetscFunctionReturn(0);
420 }
421 #undef __FUNCT__
422 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
423 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
424 {
425   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
426   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
427   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
428   PetscErrorCode ierr;
429   PetscInt       m = b->AIJ->m,n,i,*idx;
430 
431   PetscFunctionBegin;
432   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
433   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
434   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
435   for (i=0; i<m; i++) {
436     idx    = a->j + a->i[i] ;
437     v      = a->a + a->i[i] ;
438     n      = a->i[i+1] - a->i[i];
439     alpha1 = x[3*i];
440     alpha2 = x[3*i+1];
441     alpha3 = x[3*i+2];
442     while (n-->0) {
443       y[3*(*idx)]   += alpha1*(*v);
444       y[3*(*idx)+1] += alpha2*(*v);
445       y[3*(*idx)+2] += alpha3*(*v);
446       idx++; v++;
447     }
448   }
449   PetscLogFlops(6*a->nz);
450   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
451   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
452   PetscFunctionReturn(0);
453 }
454 
455 /* ------------------------------------------------------------------------------*/
456 #undef __FUNCT__
457 #define __FUNCT__ "MatMult_SeqMAIJ_4"
458 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
459 {
460   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
461   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
462   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
463   PetscErrorCode ierr;
464   PetscInt       m = b->AIJ->m,*idx,*ii;
465   PetscInt       n,i,jrow,j;
466 
467   PetscFunctionBegin;
468   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
469   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
470   idx  = a->j;
471   v    = a->a;
472   ii   = a->i;
473 
474   for (i=0; i<m; i++) {
475     jrow = ii[i];
476     n    = ii[i+1] - jrow;
477     sum1  = 0.0;
478     sum2  = 0.0;
479     sum3  = 0.0;
480     sum4  = 0.0;
481     for (j=0; j<n; j++) {
482       sum1 += v[jrow]*x[4*idx[jrow]];
483       sum2 += v[jrow]*x[4*idx[jrow]+1];
484       sum3 += v[jrow]*x[4*idx[jrow]+2];
485       sum4 += v[jrow]*x[4*idx[jrow]+3];
486       jrow++;
487      }
488     y[4*i]   = sum1;
489     y[4*i+1] = sum2;
490     y[4*i+2] = sum3;
491     y[4*i+3] = sum4;
492   }
493 
494   PetscLogFlops(8*a->nz - 4*m);
495   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
496   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
502 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
503 {
504   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
505   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
506   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
507   PetscErrorCode ierr;
508   PetscInt       m = b->AIJ->m,n,i,*idx;
509 
510   PetscFunctionBegin;
511   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
512   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
513   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
514   for (i=0; i<m; i++) {
515     idx    = a->j + a->i[i] ;
516     v      = a->a + a->i[i] ;
517     n      = a->i[i+1] - a->i[i];
518     alpha1 = x[4*i];
519     alpha2 = x[4*i+1];
520     alpha3 = x[4*i+2];
521     alpha4 = x[4*i+3];
522     while (n-->0) {
523       y[4*(*idx)]   += alpha1*(*v);
524       y[4*(*idx)+1] += alpha2*(*v);
525       y[4*(*idx)+2] += alpha3*(*v);
526       y[4*(*idx)+3] += alpha4*(*v);
527       idx++; v++;
528     }
529   }
530   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
531   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
532   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 #undef __FUNCT__
537 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
538 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
539 {
540   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
541   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
542   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
543   PetscErrorCode ierr;
544   PetscInt       m = b->AIJ->m,*idx,*ii;
545   PetscInt       n,i,jrow,j;
546 
547   PetscFunctionBegin;
548   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
549   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
550   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
551   idx  = a->j;
552   v    = a->a;
553   ii   = a->i;
554 
555   for (i=0; i<m; i++) {
556     jrow = ii[i];
557     n    = ii[i+1] - jrow;
558     sum1  = 0.0;
559     sum2  = 0.0;
560     sum3  = 0.0;
561     sum4  = 0.0;
562     for (j=0; j<n; j++) {
563       sum1 += v[jrow]*x[4*idx[jrow]];
564       sum2 += v[jrow]*x[4*idx[jrow]+1];
565       sum3 += v[jrow]*x[4*idx[jrow]+2];
566       sum4 += v[jrow]*x[4*idx[jrow]+3];
567       jrow++;
568      }
569     y[4*i]   += sum1;
570     y[4*i+1] += sum2;
571     y[4*i+2] += sum3;
572     y[4*i+3] += sum4;
573   }
574 
575   PetscLogFlops(8*a->nz - 4*m);
576   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
577   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
578   PetscFunctionReturn(0);
579 }
580 #undef __FUNCT__
581 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
582 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
583 {
584   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
585   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
586   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
587   PetscErrorCode ierr;
588   PetscInt       m = b->AIJ->m,n,i,*idx;
589 
590   PetscFunctionBegin;
591   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
592   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
593   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
594 
595   for (i=0; i<m; i++) {
596     idx    = a->j + a->i[i] ;
597     v      = a->a + a->i[i] ;
598     n      = a->i[i+1] - a->i[i];
599     alpha1 = x[4*i];
600     alpha2 = x[4*i+1];
601     alpha3 = x[4*i+2];
602     alpha4 = x[4*i+3];
603     while (n-->0) {
604       y[4*(*idx)]   += alpha1*(*v);
605       y[4*(*idx)+1] += alpha2*(*v);
606       y[4*(*idx)+2] += alpha3*(*v);
607       y[4*(*idx)+3] += alpha4*(*v);
608       idx++; v++;
609     }
610   }
611   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
612   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
613   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
614   PetscFunctionReturn(0);
615 }
616 /* ------------------------------------------------------------------------------*/
617 
618 #undef __FUNCT__
619 #define __FUNCT__ "MatMult_SeqMAIJ_5"
620 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
621 {
622   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
623   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
624   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
625   PetscErrorCode ierr;
626   PetscInt       m = b->AIJ->m,*idx,*ii;
627   PetscInt       n,i,jrow,j;
628 
629   PetscFunctionBegin;
630   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
631   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
632   idx  = a->j;
633   v    = a->a;
634   ii   = a->i;
635 
636   for (i=0; i<m; i++) {
637     jrow = ii[i];
638     n    = ii[i+1] - jrow;
639     sum1  = 0.0;
640     sum2  = 0.0;
641     sum3  = 0.0;
642     sum4  = 0.0;
643     sum5  = 0.0;
644     for (j=0; j<n; j++) {
645       sum1 += v[jrow]*x[5*idx[jrow]];
646       sum2 += v[jrow]*x[5*idx[jrow]+1];
647       sum3 += v[jrow]*x[5*idx[jrow]+2];
648       sum4 += v[jrow]*x[5*idx[jrow]+3];
649       sum5 += v[jrow]*x[5*idx[jrow]+4];
650       jrow++;
651      }
652     y[5*i]   = sum1;
653     y[5*i+1] = sum2;
654     y[5*i+2] = sum3;
655     y[5*i+3] = sum4;
656     y[5*i+4] = sum5;
657   }
658 
659   PetscLogFlops(10*a->nz - 5*m);
660   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
661   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
662   PetscFunctionReturn(0);
663 }
664 
665 #undef __FUNCT__
666 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
667 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
668 {
669   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
670   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
671   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
672   PetscErrorCode ierr;
673   PetscInt       m = b->AIJ->m,n,i,*idx;
674 
675   PetscFunctionBegin;
676   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
677   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
678   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
679 
680   for (i=0; i<m; i++) {
681     idx    = a->j + a->i[i] ;
682     v      = a->a + a->i[i] ;
683     n      = a->i[i+1] - a->i[i];
684     alpha1 = x[5*i];
685     alpha2 = x[5*i+1];
686     alpha3 = x[5*i+2];
687     alpha4 = x[5*i+3];
688     alpha5 = x[5*i+4];
689     while (n-->0) {
690       y[5*(*idx)]   += alpha1*(*v);
691       y[5*(*idx)+1] += alpha2*(*v);
692       y[5*(*idx)+2] += alpha3*(*v);
693       y[5*(*idx)+3] += alpha4*(*v);
694       y[5*(*idx)+4] += alpha5*(*v);
695       idx++; v++;
696     }
697   }
698   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
699   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
700   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
706 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
707 {
708   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
709   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
710   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
711   PetscErrorCode ierr;
712   PetscInt       m = b->AIJ->m,*idx,*ii;
713   PetscInt       n,i,jrow,j;
714 
715   PetscFunctionBegin;
716   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
717   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
718   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
719   idx  = a->j;
720   v    = a->a;
721   ii   = a->i;
722 
723   for (i=0; i<m; i++) {
724     jrow = ii[i];
725     n    = ii[i+1] - jrow;
726     sum1  = 0.0;
727     sum2  = 0.0;
728     sum3  = 0.0;
729     sum4  = 0.0;
730     sum5  = 0.0;
731     for (j=0; j<n; j++) {
732       sum1 += v[jrow]*x[5*idx[jrow]];
733       sum2 += v[jrow]*x[5*idx[jrow]+1];
734       sum3 += v[jrow]*x[5*idx[jrow]+2];
735       sum4 += v[jrow]*x[5*idx[jrow]+3];
736       sum5 += v[jrow]*x[5*idx[jrow]+4];
737       jrow++;
738      }
739     y[5*i]   += sum1;
740     y[5*i+1] += sum2;
741     y[5*i+2] += sum3;
742     y[5*i+3] += sum4;
743     y[5*i+4] += sum5;
744   }
745 
746   PetscLogFlops(10*a->nz);
747   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
748   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
749   PetscFunctionReturn(0);
750 }
751 
752 #undef __FUNCT__
753 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
754 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
755 {
756   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
757   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
758   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
759   PetscErrorCode ierr;
760   PetscInt       m = b->AIJ->m,n,i,*idx;
761 
762   PetscFunctionBegin;
763   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
764   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
765   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
766 
767   for (i=0; i<m; i++) {
768     idx    = a->j + a->i[i] ;
769     v      = a->a + a->i[i] ;
770     n      = a->i[i+1] - a->i[i];
771     alpha1 = x[5*i];
772     alpha2 = x[5*i+1];
773     alpha3 = x[5*i+2];
774     alpha4 = x[5*i+3];
775     alpha5 = x[5*i+4];
776     while (n-->0) {
777       y[5*(*idx)]   += alpha1*(*v);
778       y[5*(*idx)+1] += alpha2*(*v);
779       y[5*(*idx)+2] += alpha3*(*v);
780       y[5*(*idx)+3] += alpha4*(*v);
781       y[5*(*idx)+4] += alpha5*(*v);
782       idx++; v++;
783     }
784   }
785   PetscLogFlops(10*a->nz);
786   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
787   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
788   PetscFunctionReturn(0);
789 }
790 
791 /* ------------------------------------------------------------------------------*/
792 #undef __FUNCT__
793 #define __FUNCT__ "MatMult_SeqMAIJ_6"
794 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
795 {
796   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
797   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
798   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
799   PetscErrorCode ierr;
800   PetscInt       m = b->AIJ->m,*idx,*ii;
801   PetscInt       n,i,jrow,j;
802 
803   PetscFunctionBegin;
804   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
805   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
806   idx  = a->j;
807   v    = a->a;
808   ii   = a->i;
809 
810   for (i=0; i<m; i++) {
811     jrow = ii[i];
812     n    = ii[i+1] - jrow;
813     sum1  = 0.0;
814     sum2  = 0.0;
815     sum3  = 0.0;
816     sum4  = 0.0;
817     sum5  = 0.0;
818     sum6  = 0.0;
819     for (j=0; j<n; j++) {
820       sum1 += v[jrow]*x[6*idx[jrow]];
821       sum2 += v[jrow]*x[6*idx[jrow]+1];
822       sum3 += v[jrow]*x[6*idx[jrow]+2];
823       sum4 += v[jrow]*x[6*idx[jrow]+3];
824       sum5 += v[jrow]*x[6*idx[jrow]+4];
825       sum6 += v[jrow]*x[6*idx[jrow]+5];
826       jrow++;
827      }
828     y[6*i]   = sum1;
829     y[6*i+1] = sum2;
830     y[6*i+2] = sum3;
831     y[6*i+3] = sum4;
832     y[6*i+4] = sum5;
833     y[6*i+5] = sum6;
834   }
835 
836   PetscLogFlops(12*a->nz - 6*m);
837   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
838   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
839   PetscFunctionReturn(0);
840 }
841 
842 #undef __FUNCT__
843 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
844 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
845 {
846   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
847   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
848   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
849   PetscErrorCode ierr;
850   PetscInt       m = b->AIJ->m,n,i,*idx;
851 
852   PetscFunctionBegin;
853   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
854   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
855   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
856 
857   for (i=0; i<m; i++) {
858     idx    = a->j + a->i[i] ;
859     v      = a->a + a->i[i] ;
860     n      = a->i[i+1] - a->i[i];
861     alpha1 = x[6*i];
862     alpha2 = x[6*i+1];
863     alpha3 = x[6*i+2];
864     alpha4 = x[6*i+3];
865     alpha5 = x[6*i+4];
866     alpha6 = x[6*i+5];
867     while (n-->0) {
868       y[6*(*idx)]   += alpha1*(*v);
869       y[6*(*idx)+1] += alpha2*(*v);
870       y[6*(*idx)+2] += alpha3*(*v);
871       y[6*(*idx)+3] += alpha4*(*v);
872       y[6*(*idx)+4] += alpha5*(*v);
873       y[6*(*idx)+5] += alpha6*(*v);
874       idx++; v++;
875     }
876   }
877   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
878   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
879   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
880   PetscFunctionReturn(0);
881 }
882 
883 #undef __FUNCT__
884 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
885 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
886 {
887   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
888   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
889   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
890   PetscErrorCode ierr;
891   PetscInt       m = b->AIJ->m,*idx,*ii;
892   PetscInt       n,i,jrow,j;
893 
894   PetscFunctionBegin;
895   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
896   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
897   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
898   idx  = a->j;
899   v    = a->a;
900   ii   = a->i;
901 
902   for (i=0; i<m; i++) {
903     jrow = ii[i];
904     n    = ii[i+1] - jrow;
905     sum1  = 0.0;
906     sum2  = 0.0;
907     sum3  = 0.0;
908     sum4  = 0.0;
909     sum5  = 0.0;
910     sum6  = 0.0;
911     for (j=0; j<n; j++) {
912       sum1 += v[jrow]*x[6*idx[jrow]];
913       sum2 += v[jrow]*x[6*idx[jrow]+1];
914       sum3 += v[jrow]*x[6*idx[jrow]+2];
915       sum4 += v[jrow]*x[6*idx[jrow]+3];
916       sum5 += v[jrow]*x[6*idx[jrow]+4];
917       sum6 += v[jrow]*x[6*idx[jrow]+5];
918       jrow++;
919      }
920     y[6*i]   += sum1;
921     y[6*i+1] += sum2;
922     y[6*i+2] += sum3;
923     y[6*i+3] += sum4;
924     y[6*i+4] += sum5;
925     y[6*i+5] += sum6;
926   }
927 
928   PetscLogFlops(12*a->nz);
929   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
930   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
936 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
937 {
938   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
939   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
940   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
941   PetscErrorCode ierr;
942   PetscInt       m = b->AIJ->m,n,i,*idx;
943 
944   PetscFunctionBegin;
945   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
946   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
947   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
948 
949   for (i=0; i<m; i++) {
950     idx    = a->j + a->i[i] ;
951     v      = a->a + a->i[i] ;
952     n      = a->i[i+1] - a->i[i];
953     alpha1 = x[6*i];
954     alpha2 = x[6*i+1];
955     alpha3 = x[6*i+2];
956     alpha4 = x[6*i+3];
957     alpha5 = x[6*i+4];
958     alpha6 = x[6*i+5];
959     while (n-->0) {
960       y[6*(*idx)]   += alpha1*(*v);
961       y[6*(*idx)+1] += alpha2*(*v);
962       y[6*(*idx)+2] += alpha3*(*v);
963       y[6*(*idx)+3] += alpha4*(*v);
964       y[6*(*idx)+4] += alpha5*(*v);
965       y[6*(*idx)+5] += alpha6*(*v);
966       idx++; v++;
967     }
968   }
969   PetscLogFlops(12*a->nz);
970   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
971   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 /* ------------------------------------------------------------------------------*/
976 #undef __FUNCT__
977 #define __FUNCT__ "MatMult_SeqMAIJ_8"
978 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
979 {
980   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
981   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
982   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
983   PetscErrorCode ierr;
984   PetscInt       m = b->AIJ->m,*idx,*ii;
985   PetscInt       n,i,jrow,j;
986 
987   PetscFunctionBegin;
988   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
989   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
990   idx  = a->j;
991   v    = a->a;
992   ii   = a->i;
993 
994   for (i=0; i<m; i++) {
995     jrow = ii[i];
996     n    = ii[i+1] - jrow;
997     sum1  = 0.0;
998     sum2  = 0.0;
999     sum3  = 0.0;
1000     sum4  = 0.0;
1001     sum5  = 0.0;
1002     sum6  = 0.0;
1003     sum7  = 0.0;
1004     sum8  = 0.0;
1005     for (j=0; j<n; j++) {
1006       sum1 += v[jrow]*x[8*idx[jrow]];
1007       sum2 += v[jrow]*x[8*idx[jrow]+1];
1008       sum3 += v[jrow]*x[8*idx[jrow]+2];
1009       sum4 += v[jrow]*x[8*idx[jrow]+3];
1010       sum5 += v[jrow]*x[8*idx[jrow]+4];
1011       sum6 += v[jrow]*x[8*idx[jrow]+5];
1012       sum7 += v[jrow]*x[8*idx[jrow]+6];
1013       sum8 += v[jrow]*x[8*idx[jrow]+7];
1014       jrow++;
1015      }
1016     y[8*i]   = sum1;
1017     y[8*i+1] = sum2;
1018     y[8*i+2] = sum3;
1019     y[8*i+3] = sum4;
1020     y[8*i+4] = sum5;
1021     y[8*i+5] = sum6;
1022     y[8*i+6] = sum7;
1023     y[8*i+7] = sum8;
1024   }
1025 
1026   PetscLogFlops(16*a->nz - 8*m);
1027   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1028   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1029   PetscFunctionReturn(0);
1030 }
1031 
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1034 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1035 {
1036   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1037   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1038   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1039   PetscErrorCode ierr;
1040   PetscInt       m = b->AIJ->m,n,i,*idx;
1041 
1042   PetscFunctionBegin;
1043   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1044   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1045   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1046 
1047   for (i=0; i<m; i++) {
1048     idx    = a->j + a->i[i] ;
1049     v      = a->a + a->i[i] ;
1050     n      = a->i[i+1] - a->i[i];
1051     alpha1 = x[8*i];
1052     alpha2 = x[8*i+1];
1053     alpha3 = x[8*i+2];
1054     alpha4 = x[8*i+3];
1055     alpha5 = x[8*i+4];
1056     alpha6 = x[8*i+5];
1057     alpha7 = x[8*i+6];
1058     alpha8 = x[8*i+7];
1059     while (n-->0) {
1060       y[8*(*idx)]   += alpha1*(*v);
1061       y[8*(*idx)+1] += alpha2*(*v);
1062       y[8*(*idx)+2] += alpha3*(*v);
1063       y[8*(*idx)+3] += alpha4*(*v);
1064       y[8*(*idx)+4] += alpha5*(*v);
1065       y[8*(*idx)+5] += alpha6*(*v);
1066       y[8*(*idx)+6] += alpha7*(*v);
1067       y[8*(*idx)+7] += alpha8*(*v);
1068       idx++; v++;
1069     }
1070   }
1071   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
1072   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1073   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1079 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1080 {
1081   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1082   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1083   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1084   PetscErrorCode ierr;
1085   PetscInt       m = b->AIJ->m,*idx,*ii;
1086   PetscInt       n,i,jrow,j;
1087 
1088   PetscFunctionBegin;
1089   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1090   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1091   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1092   idx  = a->j;
1093   v    = a->a;
1094   ii   = a->i;
1095 
1096   for (i=0; i<m; i++) {
1097     jrow = ii[i];
1098     n    = ii[i+1] - jrow;
1099     sum1  = 0.0;
1100     sum2  = 0.0;
1101     sum3  = 0.0;
1102     sum4  = 0.0;
1103     sum5  = 0.0;
1104     sum6  = 0.0;
1105     sum7  = 0.0;
1106     sum8  = 0.0;
1107     for (j=0; j<n; j++) {
1108       sum1 += v[jrow]*x[8*idx[jrow]];
1109       sum2 += v[jrow]*x[8*idx[jrow]+1];
1110       sum3 += v[jrow]*x[8*idx[jrow]+2];
1111       sum4 += v[jrow]*x[8*idx[jrow]+3];
1112       sum5 += v[jrow]*x[8*idx[jrow]+4];
1113       sum6 += v[jrow]*x[8*idx[jrow]+5];
1114       sum7 += v[jrow]*x[8*idx[jrow]+6];
1115       sum8 += v[jrow]*x[8*idx[jrow]+7];
1116       jrow++;
1117      }
1118     y[8*i]   += sum1;
1119     y[8*i+1] += sum2;
1120     y[8*i+2] += sum3;
1121     y[8*i+3] += sum4;
1122     y[8*i+4] += sum5;
1123     y[8*i+5] += sum6;
1124     y[8*i+6] += sum7;
1125     y[8*i+7] += sum8;
1126   }
1127 
1128   PetscLogFlops(16*a->nz);
1129   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1130   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 #undef __FUNCT__
1135 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1136 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1137 {
1138   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1139   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1140   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1141   PetscErrorCode ierr;
1142   PetscInt       m = b->AIJ->m,n,i,*idx;
1143 
1144   PetscFunctionBegin;
1145   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1146   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1147   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1148   for (i=0; i<m; i++) {
1149     idx    = a->j + a->i[i] ;
1150     v      = a->a + a->i[i] ;
1151     n      = a->i[i+1] - a->i[i];
1152     alpha1 = x[8*i];
1153     alpha2 = x[8*i+1];
1154     alpha3 = x[8*i+2];
1155     alpha4 = x[8*i+3];
1156     alpha5 = x[8*i+4];
1157     alpha6 = x[8*i+5];
1158     alpha7 = x[8*i+6];
1159     alpha8 = x[8*i+7];
1160     while (n-->0) {
1161       y[8*(*idx)]   += alpha1*(*v);
1162       y[8*(*idx)+1] += alpha2*(*v);
1163       y[8*(*idx)+2] += alpha3*(*v);
1164       y[8*(*idx)+3] += alpha4*(*v);
1165       y[8*(*idx)+4] += alpha5*(*v);
1166       y[8*(*idx)+5] += alpha6*(*v);
1167       y[8*(*idx)+6] += alpha7*(*v);
1168       y[8*(*idx)+7] += alpha8*(*v);
1169       idx++; v++;
1170     }
1171   }
1172   PetscLogFlops(16*a->nz);
1173   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1174   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 /* ------------------------------------------------------------------------------*/
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatMult_SeqMAIJ_9"
1181 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1182 {
1183   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1184   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1185   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1186   PetscErrorCode ierr;
1187   PetscInt       m = b->AIJ->m,*idx,*ii;
1188   PetscInt       n,i,jrow,j;
1189 
1190   PetscFunctionBegin;
1191   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1192   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1193   idx  = a->j;
1194   v    = a->a;
1195   ii   = a->i;
1196 
1197   for (i=0; i<m; i++) {
1198     jrow = ii[i];
1199     n    = ii[i+1] - jrow;
1200     sum1  = 0.0;
1201     sum2  = 0.0;
1202     sum3  = 0.0;
1203     sum4  = 0.0;
1204     sum5  = 0.0;
1205     sum6  = 0.0;
1206     sum7  = 0.0;
1207     sum8  = 0.0;
1208     sum9  = 0.0;
1209     for (j=0; j<n; j++) {
1210       sum1 += v[jrow]*x[9*idx[jrow]];
1211       sum2 += v[jrow]*x[9*idx[jrow]+1];
1212       sum3 += v[jrow]*x[9*idx[jrow]+2];
1213       sum4 += v[jrow]*x[9*idx[jrow]+3];
1214       sum5 += v[jrow]*x[9*idx[jrow]+4];
1215       sum6 += v[jrow]*x[9*idx[jrow]+5];
1216       sum7 += v[jrow]*x[9*idx[jrow]+6];
1217       sum8 += v[jrow]*x[9*idx[jrow]+7];
1218       sum9 += v[jrow]*x[9*idx[jrow]+8];
1219       jrow++;
1220      }
1221     y[9*i]   = sum1;
1222     y[9*i+1] = sum2;
1223     y[9*i+2] = sum3;
1224     y[9*i+3] = sum4;
1225     y[9*i+4] = sum5;
1226     y[9*i+5] = sum6;
1227     y[9*i+6] = sum7;
1228     y[9*i+7] = sum8;
1229     y[9*i+8] = sum9;
1230   }
1231 
1232   PetscLogFlops(18*a->nz - 9*m);
1233   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1234   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 #undef __FUNCT__
1239 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1240 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1241 {
1242   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1243   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1244   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1245   PetscErrorCode ierr;
1246   PetscInt       m = b->AIJ->m,n,i,*idx;
1247 
1248   PetscFunctionBegin;
1249   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1250   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1251   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1252 
1253   for (i=0; i<m; i++) {
1254     idx    = a->j + a->i[i] ;
1255     v      = a->a + a->i[i] ;
1256     n      = a->i[i+1] - a->i[i];
1257     alpha1 = x[9*i];
1258     alpha2 = x[9*i+1];
1259     alpha3 = x[9*i+2];
1260     alpha4 = x[9*i+3];
1261     alpha5 = x[9*i+4];
1262     alpha6 = x[9*i+5];
1263     alpha7 = x[9*i+6];
1264     alpha8 = x[9*i+7];
1265     alpha9 = x[9*i+8];
1266     while (n-->0) {
1267       y[9*(*idx)]   += alpha1*(*v);
1268       y[9*(*idx)+1] += alpha2*(*v);
1269       y[9*(*idx)+2] += alpha3*(*v);
1270       y[9*(*idx)+3] += alpha4*(*v);
1271       y[9*(*idx)+4] += alpha5*(*v);
1272       y[9*(*idx)+5] += alpha6*(*v);
1273       y[9*(*idx)+6] += alpha7*(*v);
1274       y[9*(*idx)+7] += alpha8*(*v);
1275       y[9*(*idx)+8] += alpha9*(*v);
1276       idx++; v++;
1277     }
1278   }
1279   PetscLogFlops(18*a->nz - 9*b->AIJ->n);
1280   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1281   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1287 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1288 {
1289   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1290   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1291   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1292   PetscErrorCode ierr;
1293   PetscInt       m = b->AIJ->m,*idx,*ii;
1294   PetscInt       n,i,jrow,j;
1295 
1296   PetscFunctionBegin;
1297   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1298   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1299   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1300   idx  = a->j;
1301   v    = a->a;
1302   ii   = a->i;
1303 
1304   for (i=0; i<m; i++) {
1305     jrow = ii[i];
1306     n    = ii[i+1] - jrow;
1307     sum1  = 0.0;
1308     sum2  = 0.0;
1309     sum3  = 0.0;
1310     sum4  = 0.0;
1311     sum5  = 0.0;
1312     sum6  = 0.0;
1313     sum7  = 0.0;
1314     sum8  = 0.0;
1315     sum9  = 0.0;
1316     for (j=0; j<n; j++) {
1317       sum1 += v[jrow]*x[9*idx[jrow]];
1318       sum2 += v[jrow]*x[9*idx[jrow]+1];
1319       sum3 += v[jrow]*x[9*idx[jrow]+2];
1320       sum4 += v[jrow]*x[9*idx[jrow]+3];
1321       sum5 += v[jrow]*x[9*idx[jrow]+4];
1322       sum6 += v[jrow]*x[9*idx[jrow]+5];
1323       sum7 += v[jrow]*x[9*idx[jrow]+6];
1324       sum8 += v[jrow]*x[9*idx[jrow]+7];
1325       sum9 += v[jrow]*x[9*idx[jrow]+8];
1326       jrow++;
1327      }
1328     y[9*i]   += sum1;
1329     y[9*i+1] += sum2;
1330     y[9*i+2] += sum3;
1331     y[9*i+3] += sum4;
1332     y[9*i+4] += sum5;
1333     y[9*i+5] += sum6;
1334     y[9*i+6] += sum7;
1335     y[9*i+7] += sum8;
1336     y[9*i+8] += sum9;
1337   }
1338 
1339   PetscLogFlops(18*a->nz);
1340   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1341   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNCT__
1346 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1347 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1348 {
1349   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1350   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1351   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1352   PetscErrorCode ierr;
1353   PetscInt       m = b->AIJ->m,n,i,*idx;
1354 
1355   PetscFunctionBegin;
1356   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1357   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1358   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1359   for (i=0; i<m; i++) {
1360     idx    = a->j + a->i[i] ;
1361     v      = a->a + a->i[i] ;
1362     n      = a->i[i+1] - a->i[i];
1363     alpha1 = x[9*i];
1364     alpha2 = x[9*i+1];
1365     alpha3 = x[9*i+2];
1366     alpha4 = x[9*i+3];
1367     alpha5 = x[9*i+4];
1368     alpha6 = x[9*i+5];
1369     alpha7 = x[9*i+6];
1370     alpha8 = x[9*i+7];
1371     alpha9 = x[9*i+8];
1372     while (n-->0) {
1373       y[9*(*idx)]   += alpha1*(*v);
1374       y[9*(*idx)+1] += alpha2*(*v);
1375       y[9*(*idx)+2] += alpha3*(*v);
1376       y[9*(*idx)+3] += alpha4*(*v);
1377       y[9*(*idx)+4] += alpha5*(*v);
1378       y[9*(*idx)+5] += alpha6*(*v);
1379       y[9*(*idx)+6] += alpha7*(*v);
1380       y[9*(*idx)+7] += alpha8*(*v);
1381       y[9*(*idx)+8] += alpha9*(*v);
1382       idx++; v++;
1383     }
1384   }
1385   PetscLogFlops(18*a->nz);
1386   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1387   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 /*--------------------------------------------------------------------------------------------*/
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatMult_SeqMAIJ_16"
1394 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1395 {
1396   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1397   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1398   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1399   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1400   PetscErrorCode ierr;
1401   PetscInt       m = b->AIJ->m,*idx,*ii;
1402   PetscInt       n,i,jrow,j;
1403 
1404   PetscFunctionBegin;
1405   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1406   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1407   idx  = a->j;
1408   v    = a->a;
1409   ii   = a->i;
1410 
1411   for (i=0; i<m; i++) {
1412     jrow = ii[i];
1413     n    = ii[i+1] - jrow;
1414     sum1  = 0.0;
1415     sum2  = 0.0;
1416     sum3  = 0.0;
1417     sum4  = 0.0;
1418     sum5  = 0.0;
1419     sum6  = 0.0;
1420     sum7  = 0.0;
1421     sum8  = 0.0;
1422     sum9  = 0.0;
1423     sum10 = 0.0;
1424     sum11 = 0.0;
1425     sum12 = 0.0;
1426     sum13 = 0.0;
1427     sum14 = 0.0;
1428     sum15 = 0.0;
1429     sum16 = 0.0;
1430     for (j=0; j<n; j++) {
1431       sum1  += v[jrow]*x[16*idx[jrow]];
1432       sum2  += v[jrow]*x[16*idx[jrow]+1];
1433       sum3  += v[jrow]*x[16*idx[jrow]+2];
1434       sum4  += v[jrow]*x[16*idx[jrow]+3];
1435       sum5  += v[jrow]*x[16*idx[jrow]+4];
1436       sum6  += v[jrow]*x[16*idx[jrow]+5];
1437       sum7  += v[jrow]*x[16*idx[jrow]+6];
1438       sum8  += v[jrow]*x[16*idx[jrow]+7];
1439       sum9  += v[jrow]*x[16*idx[jrow]+8];
1440       sum10 += v[jrow]*x[16*idx[jrow]+9];
1441       sum11 += v[jrow]*x[16*idx[jrow]+10];
1442       sum12 += v[jrow]*x[16*idx[jrow]+11];
1443       sum13 += v[jrow]*x[16*idx[jrow]+12];
1444       sum14 += v[jrow]*x[16*idx[jrow]+13];
1445       sum15 += v[jrow]*x[16*idx[jrow]+14];
1446       sum16 += v[jrow]*x[16*idx[jrow]+15];
1447       jrow++;
1448      }
1449     y[16*i]    = sum1;
1450     y[16*i+1]  = sum2;
1451     y[16*i+2]  = sum3;
1452     y[16*i+3]  = sum4;
1453     y[16*i+4]  = sum5;
1454     y[16*i+5]  = sum6;
1455     y[16*i+6]  = sum7;
1456     y[16*i+7]  = sum8;
1457     y[16*i+8]  = sum9;
1458     y[16*i+9]  = sum10;
1459     y[16*i+10] = sum11;
1460     y[16*i+11] = sum12;
1461     y[16*i+12] = sum13;
1462     y[16*i+13] = sum14;
1463     y[16*i+14] = sum15;
1464     y[16*i+15] = sum16;
1465   }
1466 
1467   PetscLogFlops(32*a->nz - 16*m);
1468   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1469   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1475 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1476 {
1477   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1478   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1479   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1480   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1481   PetscErrorCode ierr;
1482   PetscInt       m = b->AIJ->m,n,i,*idx;
1483 
1484   PetscFunctionBegin;
1485   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1486   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1487   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1488 
1489   for (i=0; i<m; i++) {
1490     idx    = a->j + a->i[i] ;
1491     v      = a->a + a->i[i] ;
1492     n      = a->i[i+1] - a->i[i];
1493     alpha1  = x[16*i];
1494     alpha2  = x[16*i+1];
1495     alpha3  = x[16*i+2];
1496     alpha4  = x[16*i+3];
1497     alpha5  = x[16*i+4];
1498     alpha6  = x[16*i+5];
1499     alpha7  = x[16*i+6];
1500     alpha8  = x[16*i+7];
1501     alpha9  = x[16*i+8];
1502     alpha10 = x[16*i+9];
1503     alpha11 = x[16*i+10];
1504     alpha12 = x[16*i+11];
1505     alpha13 = x[16*i+12];
1506     alpha14 = x[16*i+13];
1507     alpha15 = x[16*i+14];
1508     alpha16 = x[16*i+15];
1509     while (n-->0) {
1510       y[16*(*idx)]    += alpha1*(*v);
1511       y[16*(*idx)+1]  += alpha2*(*v);
1512       y[16*(*idx)+2]  += alpha3*(*v);
1513       y[16*(*idx)+3]  += alpha4*(*v);
1514       y[16*(*idx)+4]  += alpha5*(*v);
1515       y[16*(*idx)+5]  += alpha6*(*v);
1516       y[16*(*idx)+6]  += alpha7*(*v);
1517       y[16*(*idx)+7]  += alpha8*(*v);
1518       y[16*(*idx)+8]  += alpha9*(*v);
1519       y[16*(*idx)+9]  += alpha10*(*v);
1520       y[16*(*idx)+10] += alpha11*(*v);
1521       y[16*(*idx)+11] += alpha12*(*v);
1522       y[16*(*idx)+12] += alpha13*(*v);
1523       y[16*(*idx)+13] += alpha14*(*v);
1524       y[16*(*idx)+14] += alpha15*(*v);
1525       y[16*(*idx)+15] += alpha16*(*v);
1526       idx++; v++;
1527     }
1528   }
1529   PetscLogFlops(32*a->nz - 16*b->AIJ->n);
1530   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1531   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1532   PetscFunctionReturn(0);
1533 }
1534 
1535 #undef __FUNCT__
1536 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
1537 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1538 {
1539   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1540   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1541   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1542   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1543   PetscErrorCode ierr;
1544   PetscInt       m = b->AIJ->m,*idx,*ii;
1545   PetscInt       n,i,jrow,j;
1546 
1547   PetscFunctionBegin;
1548   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1549   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1550   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1551   idx  = a->j;
1552   v    = a->a;
1553   ii   = a->i;
1554 
1555   for (i=0; i<m; i++) {
1556     jrow = ii[i];
1557     n    = ii[i+1] - jrow;
1558     sum1  = 0.0;
1559     sum2  = 0.0;
1560     sum3  = 0.0;
1561     sum4  = 0.0;
1562     sum5  = 0.0;
1563     sum6  = 0.0;
1564     sum7  = 0.0;
1565     sum8  = 0.0;
1566     sum9  = 0.0;
1567     sum10 = 0.0;
1568     sum11 = 0.0;
1569     sum12 = 0.0;
1570     sum13 = 0.0;
1571     sum14 = 0.0;
1572     sum15 = 0.0;
1573     sum16 = 0.0;
1574     for (j=0; j<n; j++) {
1575       sum1  += v[jrow]*x[16*idx[jrow]];
1576       sum2  += v[jrow]*x[16*idx[jrow]+1];
1577       sum3  += v[jrow]*x[16*idx[jrow]+2];
1578       sum4  += v[jrow]*x[16*idx[jrow]+3];
1579       sum5  += v[jrow]*x[16*idx[jrow]+4];
1580       sum6  += v[jrow]*x[16*idx[jrow]+5];
1581       sum7  += v[jrow]*x[16*idx[jrow]+6];
1582       sum8  += v[jrow]*x[16*idx[jrow]+7];
1583       sum9  += v[jrow]*x[16*idx[jrow]+8];
1584       sum10 += v[jrow]*x[16*idx[jrow]+9];
1585       sum11 += v[jrow]*x[16*idx[jrow]+10];
1586       sum12 += v[jrow]*x[16*idx[jrow]+11];
1587       sum13 += v[jrow]*x[16*idx[jrow]+12];
1588       sum14 += v[jrow]*x[16*idx[jrow]+13];
1589       sum15 += v[jrow]*x[16*idx[jrow]+14];
1590       sum16 += v[jrow]*x[16*idx[jrow]+15];
1591       jrow++;
1592      }
1593     y[16*i]    += sum1;
1594     y[16*i+1]  += sum2;
1595     y[16*i+2]  += sum3;
1596     y[16*i+3]  += sum4;
1597     y[16*i+4]  += sum5;
1598     y[16*i+5]  += sum6;
1599     y[16*i+6]  += sum7;
1600     y[16*i+7]  += sum8;
1601     y[16*i+8]  += sum9;
1602     y[16*i+9]  += sum10;
1603     y[16*i+10] += sum11;
1604     y[16*i+11] += sum12;
1605     y[16*i+12] += sum13;
1606     y[16*i+13] += sum14;
1607     y[16*i+14] += sum15;
1608     y[16*i+15] += sum16;
1609   }
1610 
1611   PetscLogFlops(32*a->nz);
1612   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1613   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 #undef __FUNCT__
1618 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
1619 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1620 {
1621   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1622   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1623   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1624   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1625   PetscErrorCode ierr;
1626   PetscInt       m = b->AIJ->m,n,i,*idx;
1627 
1628   PetscFunctionBegin;
1629   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1630   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1631   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1632   for (i=0; i<m; i++) {
1633     idx    = a->j + a->i[i] ;
1634     v      = a->a + a->i[i] ;
1635     n      = a->i[i+1] - a->i[i];
1636     alpha1 = x[16*i];
1637     alpha2 = x[16*i+1];
1638     alpha3 = x[16*i+2];
1639     alpha4 = x[16*i+3];
1640     alpha5 = x[16*i+4];
1641     alpha6 = x[16*i+5];
1642     alpha7 = x[16*i+6];
1643     alpha8 = x[16*i+7];
1644     alpha9  = x[16*i+8];
1645     alpha10 = x[16*i+9];
1646     alpha11 = x[16*i+10];
1647     alpha12 = x[16*i+11];
1648     alpha13 = x[16*i+12];
1649     alpha14 = x[16*i+13];
1650     alpha15 = x[16*i+14];
1651     alpha16 = x[16*i+15];
1652     while (n-->0) {
1653       y[16*(*idx)]   += alpha1*(*v);
1654       y[16*(*idx)+1] += alpha2*(*v);
1655       y[16*(*idx)+2] += alpha3*(*v);
1656       y[16*(*idx)+3] += alpha4*(*v);
1657       y[16*(*idx)+4] += alpha5*(*v);
1658       y[16*(*idx)+5] += alpha6*(*v);
1659       y[16*(*idx)+6] += alpha7*(*v);
1660       y[16*(*idx)+7] += alpha8*(*v);
1661       y[16*(*idx)+8]  += alpha9*(*v);
1662       y[16*(*idx)+9]  += alpha10*(*v);
1663       y[16*(*idx)+10] += alpha11*(*v);
1664       y[16*(*idx)+11] += alpha12*(*v);
1665       y[16*(*idx)+12] += alpha13*(*v);
1666       y[16*(*idx)+13] += alpha14*(*v);
1667       y[16*(*idx)+14] += alpha15*(*v);
1668       y[16*(*idx)+15] += alpha16*(*v);
1669       idx++; v++;
1670     }
1671   }
1672   PetscLogFlops(32*a->nz);
1673   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1674   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 /*===================================================================================*/
1679 #undef __FUNCT__
1680 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
1681 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1682 {
1683   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1684   PetscErrorCode ierr;
1685 
1686   PetscFunctionBegin;
1687   /* start the scatter */
1688   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1689   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
1690   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1691   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
1697 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1698 {
1699   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1700   PetscErrorCode ierr;
1701 
1702   PetscFunctionBegin;
1703   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1704   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
1705   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1706   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1707   PetscFunctionReturn(0);
1708 }
1709 
1710 #undef __FUNCT__
1711 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
1712 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1713 {
1714   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1715   PetscErrorCode ierr;
1716 
1717   PetscFunctionBegin;
1718   /* start the scatter */
1719   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1720   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1721   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1722   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
1723   PetscFunctionReturn(0);
1724 }
1725 
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
1728 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1729 {
1730   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1731   PetscErrorCode ierr;
1732 
1733   PetscFunctionBegin;
1734   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1735   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1736   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1737   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 #undef __FUNCT__
1742 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
1743 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
1744 {
1745   /* This routine requires testing -- but it's getting better. */
1746   PetscErrorCode ierr;
1747   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1748   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
1749   Mat            P=pp->AIJ;
1750   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
1751   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
1752   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
1753   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof,cn;
1754   PetscInt       i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
1755   MatScalar      *ca;
1756 
1757   PetscFunctionBegin;
1758   /* Start timer */
1759   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
1760 
1761   /* Get ij structure of P^T */
1762   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
1763 
1764   cn = pn*ppdof;
1765   /* Allocate ci array, arrays for fill computation and */
1766   /* free space for accumulating nonzero column info */
1767   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
1768   ci[0] = 0;
1769 
1770   /* Work arrays for rows of P^T*A */
1771   ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
1772   ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
1773   ptasparserow = ptadenserow  + an;
1774   denserow     = ptasparserow + an;
1775   sparserow    = denserow     + cn;
1776 
1777   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
1778   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
1779   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
1780   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
1781   current_space = free_space;
1782 
1783   /* Determine symbolic info for each row of C: */
1784   for (i=0;i<pn;i++) {
1785     ptnzi  = pti[i+1] - pti[i];
1786     ptJ    = ptj + pti[i];
1787     for (dof=0;dof<ppdof;dof++) {
1788       ptanzi = 0;
1789       /* Determine symbolic row of PtA: */
1790       for (j=0;j<ptnzi;j++) {
1791         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
1792         arow = ptJ[j]*ppdof + dof;
1793         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
1794         anzj = ai[arow+1] - ai[arow];
1795         ajj  = aj + ai[arow];
1796         for (k=0;k<anzj;k++) {
1797           if (!ptadenserow[ajj[k]]) {
1798             ptadenserow[ajj[k]]    = -1;
1799             ptasparserow[ptanzi++] = ajj[k];
1800           }
1801         }
1802       }
1803       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
1804       ptaj = ptasparserow;
1805       cnzi   = 0;
1806       for (j=0;j<ptanzi;j++) {
1807         /* Get offset within block of P */
1808         pshift = *ptaj%ppdof;
1809         /* Get block row of P */
1810         prow = (*ptaj++)/ppdof; /* integer division */
1811         /* P has same number of nonzeros per row as the compressed form */
1812         pnzj = pi[prow+1] - pi[prow];
1813         pjj  = pj + pi[prow];
1814         for (k=0;k<pnzj;k++) {
1815           /* Locations in C are shifted by the offset within the block */
1816           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
1817           if (!denserow[pjj[k]*ppdof+pshift]) {
1818             denserow[pjj[k]*ppdof+pshift] = -1;
1819             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
1820           }
1821         }
1822       }
1823 
1824       /* sort sparserow */
1825       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
1826 
1827       /* If free space is not available, make more free space */
1828       /* Double the amount of total space in the list */
1829       if (current_space->local_remaining<cnzi) {
1830         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
1831       }
1832 
1833       /* Copy data into free space, and zero out denserows */
1834       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
1835       current_space->array           += cnzi;
1836       current_space->local_used      += cnzi;
1837       current_space->local_remaining -= cnzi;
1838 
1839       for (j=0;j<ptanzi;j++) {
1840         ptadenserow[ptasparserow[j]] = 0;
1841       }
1842       for (j=0;j<cnzi;j++) {
1843         denserow[sparserow[j]] = 0;
1844       }
1845       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
1846       /*        For now, we will recompute what is needed. */
1847       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
1848     }
1849   }
1850   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
1851   /* Allocate space for cj, initialize cj, and */
1852   /* destroy list of free space and other temporary array(s) */
1853   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
1854   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
1855   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
1856 
1857   /* Allocate space for ca */
1858   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
1859   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
1860 
1861   /* put together the new matrix */
1862   ierr = MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
1863 
1864   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1865   /* Since these are PETSc arrays, change flags to free them as necessary. */
1866   c = (Mat_SeqAIJ *)((*C)->data);
1867   c->freedata = PETSC_TRUE;
1868   c->nonew    = 0;
1869 
1870   /* Clean up. */
1871   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
1872 
1873   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
1874   PetscFunctionReturn(0);
1875 }
1876 
1877 #undef __FUNCT__
1878 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
1879 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
1880 {
1881   /* This routine requires testing -- first draft only */
1882   PetscErrorCode ierr;
1883   PetscInt       flops=0;
1884   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
1885   Mat            P=pp->AIJ;
1886   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
1887   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
1888   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
1889   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
1890   PetscInt       *ci=c->i,*cj=c->j,*cjj;
1891   PetscInt       am=A->M,cn=C->N,cm=C->M,ppdof=pp->dof;
1892   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
1893   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
1894 
1895   PetscFunctionBegin;
1896   /* Allocate temporary array for storage of one row of A*P */
1897   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
1898   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1899 
1900   apj      = (PetscInt *)(apa + cn);
1901   apjdense = apj + cn;
1902 
1903   /* Clear old values in C */
1904   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
1905 
1906   for (i=0;i<am;i++) {
1907     /* Form sparse row of A*P */
1908     anzi  = ai[i+1] - ai[i];
1909     apnzj = 0;
1910     for (j=0;j<anzi;j++) {
1911       /* Get offset within block of P */
1912       pshift = *aj%ppdof;
1913       /* Get block row of P */
1914       prow   = *aj++/ppdof; /* integer division */
1915       pnzj = pi[prow+1] - pi[prow];
1916       pjj  = pj + pi[prow];
1917       paj  = pa + pi[prow];
1918       for (k=0;k<pnzj;k++) {
1919         poffset = pjj[k]*ppdof+pshift;
1920         if (!apjdense[poffset]) {
1921           apjdense[poffset] = -1;
1922           apj[apnzj++]      = poffset;
1923         }
1924         apa[poffset] += (*aa)*paj[k];
1925       }
1926       flops += 2*pnzj;
1927       aa++;
1928     }
1929 
1930     /* Sort the j index array for quick sparse axpy. */
1931     /* Note: a array does not need sorting as it is in dense storage locations. */
1932     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
1933 
1934     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
1935     prow    = i/ppdof; /* integer division */
1936     pshift  = i%ppdof;
1937     poffset = pi[prow];
1938     pnzi = pi[prow+1] - poffset;
1939     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
1940     pJ   = pj+poffset;
1941     pA   = pa+poffset;
1942     for (j=0;j<pnzi;j++) {
1943       crow   = (*pJ)*ppdof+pshift;
1944       cjj    = cj + ci[crow];
1945       caj    = ca + ci[crow];
1946       pJ++;
1947       /* Perform sparse axpy operation.  Note cjj includes apj. */
1948       for (k=0,nextap=0;nextap<apnzj;k++) {
1949         if (cjj[k]==apj[nextap]) {
1950           caj[k] += (*pA)*apa[apj[nextap++]];
1951         }
1952       }
1953       flops += 2*apnzj;
1954       pA++;
1955     }
1956 
1957     /* Zero the current row info for A*P */
1958     for (j=0;j<apnzj;j++) {
1959       apa[apj[j]]      = 0.;
1960       apjdense[apj[j]] = 0;
1961     }
1962   }
1963 
1964   /* Assemble the final matrix and clean up */
1965   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1966   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1967   ierr = PetscFree(apa);CHKERRQ(ierr);
1968   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
1969 
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 #undef __FUNCT__
1974 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
1975 PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
1976 {
1977   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1978   Mat               a = b->AIJ,B;
1979   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
1980   PetscErrorCode    ierr;
1981   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
1982   PetscInt          *cols;
1983   PetscScalar       *vals;
1984 
1985   PetscFunctionBegin;
1986   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
1987   ierr = PetscMalloc(dof*m*sizeof(int),&ilen);CHKERRQ(ierr);
1988   for (i=0; i<m; i++) {
1989     nmax = PetscMax(nmax,aij->ilen[i]);
1990     for (j=0; j<dof; j++) {
1991       ilen[dof*i+j] = aij->ilen[i];
1992     }
1993   }
1994   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
1995   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1996   ierr = PetscFree(ilen);CHKERRQ(ierr);
1997   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
1998   ii   = 0;
1999   for (i=0; i<m; i++) {
2000     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2001     for (j=0; j<dof; j++) {
2002       for (k=0; k<ncols; k++) {
2003         icols[k] = dof*cols[k]+j;
2004       }
2005       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2006       ii++;
2007     }
2008     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2009   }
2010   ierr = PetscFree(icols);CHKERRQ(ierr);
2011   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2012   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2013 
2014   if (reuse == MAT_REUSE_MATRIX) {
2015     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
2016   } else {
2017     *newmat = B;
2018   }
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 #include "src/mat/impls/aij/mpi/mpiaij.h"
2023 #undef __FUNCT__
2024 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
2025 PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
2026 {
2027   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
2028   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2029   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2030   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2031   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2032   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2033   PetscInt          dof = maij->dof,i,j,*dnz,*onz,nmax = 0,onmax = 0,*oicols,*icols,ncols,*cols,oncols,*ocols;
2034   PetscInt          rstart,cstart,*garray,ii,k;
2035   PetscErrorCode    ierr;
2036   PetscScalar       *vals,*ovals;
2037 
2038   PetscFunctionBegin;
2039   ierr = PetscMalloc2(A->m,PetscInt,&dnz,A->m,PetscInt,&onz);CHKERRQ(ierr);
2040   for (i=0; i<A->m/dof; i++) {
2041     nmax  = PetscMax(nmax,AIJ->ilen[i]);
2042     onmax = PetscMax(onmax,OAIJ->ilen[i]);
2043     for (j=0; j<dof; j++) {
2044       dnz[dof*i+j] = AIJ->ilen[i];
2045       onz[dof*i+j] = OAIJ->ilen[i];
2046     }
2047   }
2048   ierr = MatCreateMPIAIJ(A->comm,A->m,A->n,A->M,A->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
2049   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2050   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
2051 
2052   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
2053   rstart = dof*mpiaij->rstart;
2054   cstart = dof*mpiaij->cstart;
2055   garray = mpiaij->garray;
2056 
2057   ii = rstart;
2058   for (i=0; i<A->m/dof; i++) {
2059     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2060     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
2061     for (j=0; j<dof; j++) {
2062       for (k=0; k<ncols; k++) {
2063         icols[k] = cstart + dof*cols[k]+j;
2064       }
2065       for (k=0; k<oncols; k++) {
2066         oicols[k] = dof*garray[ocols[k]]+j;
2067       }
2068       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2069       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
2070       ii++;
2071     }
2072     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2073     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
2074   }
2075   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
2076 
2077   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079 
2080   if (reuse == MAT_REUSE_MATRIX) {
2081     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
2082   } else {
2083     *newmat = B;
2084   }
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 
2089 
2090 /* ---------------------------------------------------------------------------------- */
2091 /*MC
2092   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
2093   operations for multicomponent problems.  It interpolates each component the same
2094   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
2095   and MATMPIAIJ for distributed matrices.
2096 
2097   Operations provided:
2098 + MatMult
2099 . MatMultTranspose
2100 . MatMultAdd
2101 . MatMultTransposeAdd
2102 - MatView
2103 
2104   Level: advanced
2105 
2106 M*/
2107 #undef __FUNCT__
2108 #define __FUNCT__ "MatCreateMAIJ"
2109 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2110 {
2111   PetscErrorCode ierr;
2112   PetscMPIInt    size;
2113   PetscInt       n;
2114   Mat_MPIMAIJ    *b;
2115   Mat            B;
2116 
2117   PetscFunctionBegin;
2118   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
2119 
2120   if (dof == 1) {
2121     *maij = A;
2122   } else {
2123     ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr);
2124     B->assembled    = PETSC_TRUE;
2125 
2126     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2127     if (size == 1) {
2128       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
2129       B->ops->destroy = MatDestroy_SeqMAIJ;
2130       B->ops->view    = MatView_SeqMAIJ;
2131       b      = (Mat_MPIMAIJ*)B->data;
2132       b->dof = dof;
2133       b->AIJ = A;
2134       if (dof == 2) {
2135         B->ops->mult             = MatMult_SeqMAIJ_2;
2136         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
2137         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
2138         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2139       } else if (dof == 3) {
2140         B->ops->mult             = MatMult_SeqMAIJ_3;
2141         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
2142         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
2143         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2144       } else if (dof == 4) {
2145         B->ops->mult             = MatMult_SeqMAIJ_4;
2146         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
2147         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
2148         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2149       } else if (dof == 5) {
2150         B->ops->mult             = MatMult_SeqMAIJ_5;
2151         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
2152         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
2153         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2154       } else if (dof == 6) {
2155         B->ops->mult             = MatMult_SeqMAIJ_6;
2156         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
2157         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
2158         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2159       } else if (dof == 8) {
2160         B->ops->mult             = MatMult_SeqMAIJ_8;
2161         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
2162         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
2163         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2164       } else if (dof == 9) {
2165         B->ops->mult             = MatMult_SeqMAIJ_9;
2166         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
2167         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
2168         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2169       } else if (dof == 16) {
2170         B->ops->mult             = MatMult_SeqMAIJ_16;
2171         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
2172         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
2173         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2174       } else {
2175         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2176       }
2177       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2178       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2179       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
2180     } else {
2181       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2182       IS         from,to;
2183       Vec        gvec;
2184       PetscInt   *garray,i;
2185 
2186       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
2187       B->ops->destroy = MatDestroy_MPIMAIJ;
2188       B->ops->view    = MatView_MPIMAIJ;
2189       b      = (Mat_MPIMAIJ*)B->data;
2190       b->dof = dof;
2191       b->A   = A;
2192       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
2193       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
2194 
2195       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
2196       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
2197       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
2198 
2199       /* create two temporary Index sets for build scatter gather */
2200       ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2201       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2202       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
2203       ierr = PetscFree(garray);CHKERRQ(ierr);
2204       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
2205 
2206       /* create temporary global vector to generate scatter context */
2207       ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr);
2208       ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr);
2209 
2210       /* generate the scatter context */
2211       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
2212 
2213       ierr = ISDestroy(from);CHKERRQ(ierr);
2214       ierr = ISDestroy(to);CHKERRQ(ierr);
2215       ierr = VecDestroy(gvec);CHKERRQ(ierr);
2216 
2217       B->ops->mult             = MatMult_MPIMAIJ_dof;
2218       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
2219       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
2220       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
2221       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
2222     }
2223     *maij = B;
2224     ierr = MatView_Private(B);CHKERRQ(ierr);
2225   }
2226   PetscFunctionReturn(0);
2227 }
2228 
2229 
2230 
2231 
2232 
2233 
2234 
2235 
2236 
2237 
2238 
2239 
2240