xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 047240e14af00aad1ef65e96f6fface8924f7f7e)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar       *x,x1,s1;
133   const PetscScalar *b;
134   const MatScalar   *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode    ierr;
136   PetscInt          m = a->mbs,i,i2,nz,j;
137   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
157       x[0]   = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v  = aa + ai[i];
162         vi = aj + ai[i];
163         nz = diag[i] - ai[i];
164         s1 = b[i2];
165         for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]];
166         x[i2]  = idiag[0]*s1;
167         idiag += 1;
168         i2    += 1;
169       }
170       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
171       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
172     }
173     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
174         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
175       i2    = 0;
176       mdiag = a->idiag+a->mbs;
177       for (i=0; i<m; i++) {
178         x1     = x[i2];
179         x[i2]  = mdiag[0]*x1;
180         mdiag += 1;
181         i2    += 1;
182       }
183       ierr = PetscLogFlops(m);CHKERRQ(ierr);
184     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
185       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
186     }
187     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
188       idiag  = a->idiag+a->mbs - 1;
189       i2     = m - 1;
190       x1     = x[i2];
191       x[i2]  = idiag[0]*x1;
192       idiag -= 1;
193       i2    -= 1;
194       for (i=m-2; i>=0; i--) {
195         v  = aa + (diag[i]+1);
196         vi = aj + diag[i] + 1;
197         nz = ai[i+1] - diag[i] - 1;
198         s1 = x[i2];
199         for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]];
200         x[i2]  = idiag[0]*s1;
201         idiag -= 1;
202         i2    -= 1;
203       }
204       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
205     }
206   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
207   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
208   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
214 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
215 {
216   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
217   PetscScalar       *x,x1,x2,s1,s2;
218   const PetscScalar *b;
219   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
220   PetscErrorCode    ierr;
221   PetscInt          m = a->mbs,i,i2,nz,idx,j,it;
222   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
223 
224   PetscFunctionBegin;
225   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
226   its = its*lits;
227   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
228   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
229   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
230   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
231   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
232 
233   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
234 
235   diag  = a->diag;
236   idiag = a->idiag;
237   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
238   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
239 
240   if (flag & SOR_ZERO_INITIAL_GUESS) {
241     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
242       x[0]   = b[0]*idiag[0] + b[1]*idiag[2];
243       x[1]   = b[0]*idiag[1] + b[1]*idiag[3];
244       i2     = 2;
245       idiag += 4;
246       for (i=1; i<m; i++) {
247         v  = aa + 4*ai[i];
248         vi = aj + ai[i];
249         nz = diag[i] - ai[i];
250         s1 = b[i2]; s2 = b[i2+1];
251         for (j=0; j<nz; j++) {
252           idx = 2*vi[j];
253           it  = 4*j;
254           x1  = x[idx]; x2 = x[1+idx];
255           s1 -= v[it]*x1 + v[it+2]*x2;
256           s2 -= v[it+1]*x1 + v[it+3]*x2;
257         }
258         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
259         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
260         idiag  += 4;
261         i2     += 2;
262       }
263       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
264       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
265     }
266     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
267         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
268       i2    = 0;
269       mdiag = a->idiag+4*a->mbs;
270       for (i=0; i<m; i++) {
271         x1      = x[i2]; x2 = x[i2+1];
272         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
273         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
274         mdiag  += 4;
275         i2     += 2;
276       }
277       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
278     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
279       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
280     }
281     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
282       idiag   = a->idiag+4*a->mbs - 4;
283       i2      = 2*m - 2;
284       x1      = x[i2]; x2 = x[i2+1];
285       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
286       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
287       idiag  -= 4;
288       i2     -= 2;
289       for (i=m-2; i>=0; i--) {
290         v  = aa + 4*(diag[i]+1);
291         vi = aj + diag[i] + 1;
292         nz = ai[i+1] - diag[i] - 1;
293         s1 = x[i2]; s2 = x[i2+1];
294         for (j=0; j<nz; j++) {
295           idx = 2*vi[j];
296           it  = 4*j;
297           x1  = x[idx]; x2 = x[1+idx];
298           s1 -= v[it]*x1 + v[it+2]*x2;
299           s2 -= v[it+1]*x1 + v[it+3]*x2;
300         }
301         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
302         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
303         idiag  -= 4;
304         i2     -= 2;
305       }
306       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
307     }
308   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
309   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
310   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
311   PetscFunctionReturn(0);
312 }
313 
314 #undef __FUNCT__
315 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
316 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
317 {
318   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
319   PetscScalar       *x,x1,x2,x3,s1,s2,s3;
320   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
321   const PetscScalar *b;
322   PetscErrorCode    ierr;
323   PetscInt          m = a->mbs,i,i2,nz,idx;
324   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
325 
326   PetscFunctionBegin;
327   its = its*lits;
328   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
329   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
330   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
331   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
332   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
333   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
334 
335   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
336 
337   diag  = a->diag;
338   idiag = a->idiag;
339   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
340   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
341 
342   if (flag & SOR_ZERO_INITIAL_GUESS) {
343     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
344       x[0]   = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
345       x[1]   = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
346       x[2]   = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
347       i2     = 3;
348       idiag += 9;
349       for (i=1; i<m; i++) {
350         v  = aa + 9*ai[i];
351         vi = aj + ai[i];
352         nz = diag[i] - ai[i];
353         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
354         while (nz--) {
355           idx = 3*(*vi++);
356           x1  = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
357           s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
358           s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
359           s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
360           v  += 9;
361         }
362         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
363         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
364         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
365         idiag  += 9;
366         i2     += 3;
367       }
368       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
369       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
370     }
371     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
372         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
373       i2    = 0;
374       mdiag = a->idiag+9*a->mbs;
375       for (i=0; i<m; i++) {
376         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
377         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
378         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
379         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
380         mdiag  += 9;
381         i2     += 3;
382       }
383       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
384     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
385       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
386     }
387     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
388       idiag   = a->idiag+9*a->mbs - 9;
389       i2      = 3*m - 3;
390       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
391       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
392       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
393       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
394       idiag  -= 9;
395       i2     -= 3;
396       for (i=m-2; i>=0; i--) {
397         v  = aa + 9*(diag[i]+1);
398         vi = aj + diag[i] + 1;
399         nz = ai[i+1] - diag[i] - 1;
400         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
401         while (nz--) {
402           idx = 3*(*vi++);
403           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
404           s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
405           s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
406           s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
407           v  += 9;
408         }
409         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
410         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
411         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
412         idiag  -= 9;
413         i2     -= 3;
414       }
415       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
416     }
417   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
418   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
419   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
420   PetscFunctionReturn(0);
421 }
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
425 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
426 {
427   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
428   PetscScalar       *x,x1,x2,x3,x4,s1,s2,s3,s4;
429   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
430   const PetscScalar *b;
431   PetscErrorCode    ierr;
432   PetscInt          m = a->mbs,i,i2,nz,idx;
433   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
434 
435   PetscFunctionBegin;
436   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
437   its = its*lits;
438   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
439   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
440   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
441   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
442   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
443 
444   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
445 
446   diag  = a->diag;
447   idiag = a->idiag;
448   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
449   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
450 
451   if (flag & SOR_ZERO_INITIAL_GUESS) {
452     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
453       x[0]   = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
454       x[1]   = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
455       x[2]   = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
456       x[3]   = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
457       i2     = 4;
458       idiag += 16;
459       for (i=1; i<m; i++) {
460         v  = aa + 16*ai[i];
461         vi = aj + ai[i];
462         nz = diag[i] - ai[i];
463         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
464         while (nz--) {
465           idx = 4*(*vi++);
466           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
467           s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
468           s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
469           s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
470           s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
471           v  += 16;
472         }
473         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
474         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
475         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
476         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
477         idiag  += 16;
478         i2     += 4;
479       }
480       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
481       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
482     }
483     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
484         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
485       i2    = 0;
486       mdiag = a->idiag+16*a->mbs;
487       for (i=0; i<m; i++) {
488         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
489         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
490         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
491         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
492         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
493         mdiag  += 16;
494         i2     += 4;
495       }
496       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
497     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
498       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
499     }
500     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
501       idiag   = a->idiag+16*a->mbs - 16;
502       i2      = 4*m - 4;
503       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
504       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
505       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
506       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
507       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
508       idiag  -= 16;
509       i2     -= 4;
510       for (i=m-2; i>=0; i--) {
511         v  = aa + 16*(diag[i]+1);
512         vi = aj + diag[i] + 1;
513         nz = ai[i+1] - diag[i] - 1;
514         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
515         while (nz--) {
516           idx = 4*(*vi++);
517           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
518           s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
519           s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
520           s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
521           s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
522           v  += 16;
523         }
524         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
525         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
526         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
527         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
528         idiag  -= 16;
529         i2     -= 4;
530       }
531       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
532     }
533   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
534   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
535   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
541 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
542 {
543   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
544   PetscScalar       *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
545   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
546   const PetscScalar *b;
547   PetscErrorCode    ierr;
548   PetscInt          m = a->mbs,i,i2,nz,idx;
549   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
550 
551   PetscFunctionBegin;
552   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
553   its = its*lits;
554   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
555   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
556   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
557   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
558   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
559 
560   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
561 
562   diag  = a->diag;
563   idiag = a->idiag;
564   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
565   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
566 
567   if (flag & SOR_ZERO_INITIAL_GUESS) {
568     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
569       x[0]   = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
570       x[1]   = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
571       x[2]   = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
572       x[3]   = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
573       x[4]   = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
574       i2     = 5;
575       idiag += 25;
576       for (i=1; i<m; i++) {
577         v  = aa + 25*ai[i];
578         vi = aj + ai[i];
579         nz = diag[i] - ai[i];
580         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
581         while (nz--) {
582           idx = 5*(*vi++);
583           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
584           s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
585           s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
586           s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
587           s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
588           s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
589           v  += 25;
590         }
591         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
592         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
593         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
594         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
595         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
596         idiag  += 25;
597         i2     += 5;
598       }
599       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
600       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
601     }
602     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
603         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
604       i2    = 0;
605       mdiag = a->idiag+25*a->mbs;
606       for (i=0; i<m; i++) {
607         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
608         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
609         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
610         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
611         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
612         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
613         mdiag  += 25;
614         i2     += 5;
615       }
616       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
617     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
618       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
619     }
620     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
621       idiag   = a->idiag+25*a->mbs - 25;
622       i2      = 5*m - 5;
623       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
624       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
625       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
626       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
627       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
628       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
629       idiag  -= 25;
630       i2     -= 5;
631       for (i=m-2; i>=0; i--) {
632         v  = aa + 25*(diag[i]+1);
633         vi = aj + diag[i] + 1;
634         nz = ai[i+1] - diag[i] - 1;
635         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
636         while (nz--) {
637           idx = 5*(*vi++);
638           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
639           s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
640           s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
641           s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
642           s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
643           s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
644           v  += 25;
645         }
646         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
647         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
648         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
649         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
650         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
651         idiag  -= 25;
652         i2     -= 5;
653       }
654       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
655     }
656   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
657   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
658   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
659   PetscFunctionReturn(0);
660 }
661 
662 #undef __FUNCT__
663 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
664 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
665 {
666   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
667   PetscScalar       *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
668   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
669   const PetscScalar *b;
670   PetscErrorCode    ierr;
671   PetscInt          m = a->mbs,i,i2,nz,idx;
672   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
673 
674   PetscFunctionBegin;
675   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
676   its = its*lits;
677   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
678   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
679   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
680   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
681   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
682 
683   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
684 
685   diag  = a->diag;
686   idiag = a->idiag;
687   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
688   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
689 
690   if (flag & SOR_ZERO_INITIAL_GUESS) {
691     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
692       x[0]   = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
693       x[1]   = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
694       x[2]   = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
695       x[3]   = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
696       x[4]   = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
697       x[5]   = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
698       i2     = 6;
699       idiag += 36;
700       for (i=1; i<m; i++) {
701         v  = aa + 36*ai[i];
702         vi = aj + ai[i];
703         nz = diag[i] - ai[i];
704         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
705         while (nz--) {
706           idx = 6*(*vi++);
707           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
708           s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
709           s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
710           s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
711           s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
712           s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
713           s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
714           v  += 36;
715         }
716         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
717         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
718         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
719         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
720         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
721         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
722         idiag  += 36;
723         i2     += 6;
724       }
725       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
726       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
727     }
728     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
729         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
730       i2    = 0;
731       mdiag = a->idiag+36*a->mbs;
732       for (i=0; i<m; i++) {
733         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
734         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
735         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
736         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
737         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
738         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
739         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
740         mdiag  += 36;
741         i2     += 6;
742       }
743       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
744     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
745       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
746     }
747     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
748       idiag   = a->idiag+36*a->mbs - 36;
749       i2      = 6*m - 6;
750       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
751       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
752       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
753       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
754       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
755       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
756       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
757       idiag  -= 36;
758       i2     -= 6;
759       for (i=m-2; i>=0; i--) {
760         v  = aa + 36*(diag[i]+1);
761         vi = aj + diag[i] + 1;
762         nz = ai[i+1] - diag[i] - 1;
763         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
764         while (nz--) {
765           idx = 6*(*vi++);
766           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
767           s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
768           s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
769           s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
770           s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
771           s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
772           s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
773           v  += 36;
774         }
775         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
776         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
777         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
778         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
779         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
780         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
781         idiag  -= 36;
782         i2     -= 6;
783       }
784       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
785     }
786   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
787   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
788   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
789   PetscFunctionReturn(0);
790 }
791 
792 #undef __FUNCT__
793 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
794 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
795 {
796   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
797   PetscScalar       *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
798   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
799   const PetscScalar *b;
800   PetscErrorCode    ierr;
801   PetscInt          m = a->mbs,i,i2,nz,idx;
802   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
803 
804   PetscFunctionBegin;
805   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
806   its = its*lits;
807   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
808   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
809   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
810   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
811   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
812 
813   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
814 
815   diag  = a->diag;
816   idiag = a->idiag;
817   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
818   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
819 
820   if (flag & SOR_ZERO_INITIAL_GUESS) {
821     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
822       x[0]   = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
823       x[1]   = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
824       x[2]   = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
825       x[3]   = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
826       x[4]   = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
827       x[5]   = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
828       x[6]   = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
829       i2     = 7;
830       idiag += 49;
831       for (i=1; i<m; i++) {
832         v  = aa + 49*ai[i];
833         vi = aj + ai[i];
834         nz = diag[i] - ai[i];
835         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
836         while (nz--) {
837           idx = 7*(*vi++);
838           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
839           s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
840           s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
841           s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
842           s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
843           s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
844           s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
845           s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
846           v  += 49;
847         }
848         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
849         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
850         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
851         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
852         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
853         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
854         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
855         idiag  += 49;
856         i2     += 7;
857       }
858       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
859       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
860     }
861     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
862         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
863       i2    = 0;
864       mdiag = a->idiag+49*a->mbs;
865       for (i=0; i<m; i++) {
866         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
867         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
868         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
869         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
870         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
871         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
872         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
873         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
874         mdiag  += 49;
875         i2     += 7;
876       }
877       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
878     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
879       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
880     }
881     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
882       idiag   = a->idiag+49*a->mbs - 49;
883       i2      = 7*m - 7;
884       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
885       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
886       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
887       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
888       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
889       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
890       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
891       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
892       idiag  -= 49;
893       i2     -= 7;
894       for (i=m-2; i>=0; i--) {
895         v  = aa + 49*(diag[i]+1);
896         vi = aj + diag[i] + 1;
897         nz = ai[i+1] - diag[i] - 1;
898         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
899         while (nz--) {
900           idx = 7*(*vi++);
901           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
902           s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
903           s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
904           s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
905           s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
906           s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
907           s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
908           s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
909           v  += 49;
910         }
911         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
912         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
913         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
914         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
915         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
916         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
917         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
918         idiag  -= 49;
919         i2     -= 7;
920       }
921       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
922     }
923   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
924   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
925   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
926   PetscFunctionReturn(0);
927 }
928 
929 #undef __FUNCT__
930 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
931 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
932 {
933   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
934   PetscScalar       *x,*work,*w,*workt;
935   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
936   const PetscScalar *b;
937   PetscErrorCode    ierr;
938   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
939   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
940 
941   PetscFunctionBegin;
942   its = its*lits;
943   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
944   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
945   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
946   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
947   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
948   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
949 
950   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
951 
952   diag  = a->diag;
953   idiag = a->idiag;
954   if (!a->mult_work) {
955     k    = PetscMax(A->rmap->n,A->cmap->n);
956     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
957   }
958   work = a->mult_work;
959   if (!a->sor_work) {
960     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
961   }
962   w = a->sor_work;
963 
964   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
965   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
966 
967   if (flag & SOR_ZERO_INITIAL_GUESS) {
968     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
969       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
970       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
971       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
972       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
973       i2     = bs;
974       idiag += bs2;
975       for (i=1; i<m; i++) {
976         v  = aa + bs2*ai[i];
977         vi = aj + ai[i];
978         nz = diag[i] - ai[i];
979 
980         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
981         /* copy all rows of x that are needed into contiguous space */
982         workt = work;
983         for (j=0; j<nz; j++) {
984           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
985           workt += bs;
986         }
987         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
988         /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
989          while (nz--) {
990            idx  = N*(*vi++);
991            x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
992            s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
993            s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
994            s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
995            v   += N2;
996            } */
997 
998         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
999         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1000         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1001         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1002 
1003         idiag += bs2;
1004         i2    += bs;
1005       }
1006       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1007       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1008     }
1009     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1010         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1011       i2    = 0;
1012       mdiag = a->idiag+bs2*a->mbs;
1013       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1014       for (i=0; i<m; i++) {
1015         PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1016         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1017         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1018         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1019         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1020 
1021         mdiag += bs2;
1022         i2    += bs;
1023       }
1024       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1025     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1026       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1027     }
1028     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1029       idiag = a->idiag+bs2*a->mbs - bs2;
1030       i2    = bs*m - bs;
1031       ierr  = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1032       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1033       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1034       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1035       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1036       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1037       idiag -= bs2;
1038       i2    -= bs;
1039       for (i=m-2; i>=0; i--) {
1040         v  = aa + bs2*(diag[i]+1);
1041         vi = aj + diag[i] + 1;
1042         nz = ai[i+1] - diag[i] - 1;
1043 
1044         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1045         /* copy all rows of x that are needed into contiguous space */
1046         workt = work;
1047         for (j=0; j<nz; j++) {
1048           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1049           workt += bs;
1050         }
1051         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1052         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1053         while (nz--) {
1054           idx  = N*(*vi++);
1055           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1056           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1057           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1058           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1059           v   += N2;
1060           } */
1061 
1062         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1063         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1064         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1065         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1066         idiag -= bs2;
1067         i2    -= bs;
1068       }
1069       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1070     }
1071   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1072   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1073   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 /*
1078     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1079 */
1080 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1081 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1082 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1083 #define matsetvaluesblocked4_ matsetvaluesblocked4
1084 #endif
1085 
1086 EXTERN_C_BEGIN
1087 #undef __FUNCT__
1088 #define __FUNCT__ "matsetvaluesblocked4_"
1089 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1090 {
1091   Mat               A  = *AA;
1092   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1093   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1094   PetscInt          *ai    =a->i,*ailen=a->ilen;
1095   PetscInt          *aj    =a->j,stepval,lastcol = -1;
1096   const PetscScalar *value = v;
1097   MatScalar         *ap,*aa = a->a,*bap;
1098 
1099   PetscFunctionBegin;
1100   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1101   stepval = (n-1)*4;
1102   for (k=0; k<m; k++) { /* loop over added rows */
1103     row  = im[k];
1104     rp   = aj + ai[row];
1105     ap   = aa + 16*ai[row];
1106     nrow = ailen[row];
1107     low  = 0;
1108     high = nrow;
1109     for (l=0; l<n; l++) { /* loop over added columns */
1110       col = in[l];
1111       if (col <= lastcol)  low = 0;
1112       else                high = nrow;
1113       lastcol = col;
1114       value   = v + k*(stepval+4 + l)*4;
1115       while (high-low > 7) {
1116         t = (low+high)/2;
1117         if (rp[t] > col) high = t;
1118         else             low  = t;
1119       }
1120       for (i=low; i<high; i++) {
1121         if (rp[i] > col) break;
1122         if (rp[i] == col) {
1123           bap = ap +  16*i;
1124           for (ii=0; ii<4; ii++,value+=stepval) {
1125             for (jj=ii; jj<16; jj+=4) {
1126               bap[jj] += *value++;
1127             }
1128           }
1129           goto noinsert2;
1130         }
1131       }
1132       N = nrow++ - 1;
1133       high++; /* added new column index thus must search to one higher than before */
1134       /* shift up all the later entries in this row */
1135       for (ii=N; ii>=i; ii--) {
1136         rp[ii+1] = rp[ii];
1137         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1138       }
1139       if (N >= i) {
1140         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1141       }
1142       rp[i] = col;
1143       bap   = ap +  16*i;
1144       for (ii=0; ii<4; ii++,value+=stepval) {
1145         for (jj=ii; jj<16; jj+=4) {
1146           bap[jj] = *value++;
1147         }
1148       }
1149       noinsert2:;
1150       low = i;
1151     }
1152     ailen[row] = nrow;
1153   }
1154   PetscFunctionReturnVoid();
1155 }
1156 EXTERN_C_END
1157 
1158 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1159 #define matsetvalues4_ MATSETVALUES4
1160 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1161 #define matsetvalues4_ matsetvalues4
1162 #endif
1163 
1164 EXTERN_C_BEGIN
1165 #undef __FUNCT__
1166 #define __FUNCT__ "MatSetValues4_"
1167 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1168 {
1169   Mat         A  = *AA;
1170   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1171   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1172   PetscInt    *ai=a->i,*ailen=a->ilen;
1173   PetscInt    *aj=a->j,brow,bcol;
1174   PetscInt    ridx,cidx,lastcol = -1;
1175   MatScalar   *ap,value,*aa=a->a,*bap;
1176 
1177   PetscFunctionBegin;
1178   for (k=0; k<m; k++) { /* loop over added rows */
1179     row  = im[k]; brow = row/4;
1180     rp   = aj + ai[brow];
1181     ap   = aa + 16*ai[brow];
1182     nrow = ailen[brow];
1183     low  = 0;
1184     high = nrow;
1185     for (l=0; l<n; l++) { /* loop over added columns */
1186       col   = in[l]; bcol = col/4;
1187       ridx  = row % 4; cidx = col % 4;
1188       value = v[l + k*n];
1189       if (col <= lastcol)  low = 0;
1190       else                high = nrow;
1191       lastcol = col;
1192       while (high-low > 7) {
1193         t = (low+high)/2;
1194         if (rp[t] > bcol) high = t;
1195         else              low  = t;
1196       }
1197       for (i=low; i<high; i++) {
1198         if (rp[i] > bcol) break;
1199         if (rp[i] == bcol) {
1200           bap   = ap +  16*i + 4*cidx + ridx;
1201           *bap += value;
1202           goto noinsert1;
1203         }
1204       }
1205       N = nrow++ - 1;
1206       high++; /* added new column thus must search to one higher than before */
1207       /* shift up all the later entries in this row */
1208       for (ii=N; ii>=i; ii--) {
1209         rp[ii+1] = rp[ii];
1210         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1211       }
1212       if (N>=i) {
1213         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1214       }
1215       rp[i]                    = bcol;
1216       ap[16*i + 4*cidx + ridx] = value;
1217 noinsert1:;
1218       low = i;
1219     }
1220     ailen[brow] = nrow;
1221   }
1222   PetscFunctionReturnVoid();
1223 }
1224 EXTERN_C_END
1225 
1226 /*
1227      Checks for missing diagonals
1228 */
1229 #undef __FUNCT__
1230 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1231 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1232 {
1233   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1234   PetscErrorCode ierr;
1235   PetscInt       *diag,*jj = a->j,i;
1236 
1237   PetscFunctionBegin;
1238   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1239   *missing = PETSC_FALSE;
1240   if (A->rmap->n > 0 && !jj) {
1241     *missing = PETSC_TRUE;
1242     if (d) *d = 0;
1243     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1244   } else {
1245     diag = a->diag;
1246     for (i=0; i<a->mbs; i++) {
1247       if (jj[diag[i]] != i) {
1248         *missing = PETSC_TRUE;
1249         if (d) *d = i;
1250         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1251         break;
1252       }
1253     }
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1260 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1261 {
1262   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1263   PetscErrorCode ierr;
1264   PetscInt       i,j,m = a->mbs;
1265 
1266   PetscFunctionBegin;
1267   if (!a->diag) {
1268     ierr         = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1269     ierr         = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1270     a->free_diag = PETSC_TRUE;
1271   }
1272   for (i=0; i<m; i++) {
1273     a->diag[i] = a->i[i+1];
1274     for (j=a->i[i]; j<a->i[i+1]; j++) {
1275       if (a->j[j] == i) {
1276         a->diag[i] = j;
1277         break;
1278       }
1279     }
1280   }
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1287 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1288 {
1289   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1290   PetscErrorCode ierr;
1291   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1292   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1293 
1294   PetscFunctionBegin;
1295   *nn = n;
1296   if (!ia) PetscFunctionReturn(0);
1297   if (symmetric) {
1298     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1299     nz   = tia[n];
1300   } else {
1301     tia = a->i; tja = a->j;
1302   }
1303 
1304   if (!blockcompressed && bs > 1) {
1305     (*nn) *= bs;
1306     /* malloc & create the natural set of indices */
1307     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1308     if (n) {
1309       (*ia)[0] = 0;
1310       for (j=1; j<bs; j++) {
1311         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1312       }
1313     }
1314 
1315     for (i=1; i<n; i++) {
1316       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1317       for (j=1; j<bs; j++) {
1318         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1319       }
1320     }
1321     if (n) {
1322       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1323     }
1324 
1325     if (inja) {
1326       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1327       cnt = 0;
1328       for (i=0; i<n; i++) {
1329         for (j=0; j<bs; j++) {
1330           for (k=tia[i]; k<tia[i+1]; k++) {
1331             for (l=0; l<bs; l++) {
1332               (*ja)[cnt++] = bs*tja[k] + l;
1333             }
1334           }
1335         }
1336       }
1337     }
1338 
1339     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1340       ierr = PetscFree(tia);CHKERRQ(ierr);
1341       ierr = PetscFree(tja);CHKERRQ(ierr);
1342     }
1343   } else if (oshift == 1) {
1344     if (symmetric) {
1345       nz = tia[A->rmap->n/bs];
1346       /*  add 1 to i and j indices */
1347       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1348       *ia = tia;
1349       if (ja) {
1350         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1351         *ja = tja;
1352       }
1353     } else {
1354       nz = a->i[A->rmap->n/bs];
1355       /* malloc space and  add 1 to i and j indices */
1356       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1357       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1358       if (ja) {
1359         ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1360         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1361       }
1362     }
1363   } else {
1364     *ia = tia;
1365     if (ja) *ja = tja;
1366   }
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1372 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1373 {
1374   PetscErrorCode ierr;
1375 
1376   PetscFunctionBegin;
1377   if (!ia) PetscFunctionReturn(0);
1378   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1379     ierr = PetscFree(*ia);CHKERRQ(ierr);
1380     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1381   }
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 #undef __FUNCT__
1386 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1387 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1388 {
1389   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1390   PetscErrorCode ierr;
1391 
1392   PetscFunctionBegin;
1393 #if defined(PETSC_USE_LOG)
1394   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1395 #endif
1396   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1397   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1398   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1399   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1400   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1401   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1402   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1403   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1404   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1405   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1406   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1407   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1408   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1409 
1410   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1411   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1412   ierr = PetscFree(A->data);CHKERRQ(ierr);
1413 
1414   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1415   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1416   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1417   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1418   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1419   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1420   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1421   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1422   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1423   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1424   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1430 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1431 {
1432   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1433   PetscErrorCode ierr;
1434 
1435   PetscFunctionBegin;
1436   switch (op) {
1437   case MAT_ROW_ORIENTED:
1438     a->roworiented = flg;
1439     break;
1440   case MAT_KEEP_NONZERO_PATTERN:
1441     a->keepnonzeropattern = flg;
1442     break;
1443   case MAT_NEW_NONZERO_LOCATIONS:
1444     a->nonew = (flg ? 0 : 1);
1445     break;
1446   case MAT_NEW_NONZERO_LOCATION_ERR:
1447     a->nonew = (flg ? -1 : 0);
1448     break;
1449   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1450     a->nonew = (flg ? -2 : 0);
1451     break;
1452   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1453     a->nounused = (flg ? -1 : 0);
1454     break;
1455   case MAT_CHECK_COMPRESSED_ROW:
1456     a->compressedrow.check = flg;
1457     break;
1458   case MAT_NEW_DIAGONALS:
1459   case MAT_IGNORE_OFF_PROC_ENTRIES:
1460   case MAT_USE_HASH_TABLE:
1461     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1462     break;
1463   case MAT_SPD:
1464   case MAT_SYMMETRIC:
1465   case MAT_STRUCTURALLY_SYMMETRIC:
1466   case MAT_HERMITIAN:
1467   case MAT_SYMMETRY_ETERNAL:
1468     /* These options are handled directly by MatSetOption() */
1469     break;
1470   default:
1471     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1472   }
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1478 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1479 {
1480   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1481   PetscErrorCode ierr;
1482   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1483   MatScalar      *aa,*aa_i;
1484   PetscScalar    *v_i;
1485 
1486   PetscFunctionBegin;
1487   bs  = A->rmap->bs;
1488   ai  = a->i;
1489   aj  = a->j;
1490   aa  = a->a;
1491   bs2 = a->bs2;
1492 
1493   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1494 
1495   bn  = row/bs;   /* Block number */
1496   bp  = row % bs; /* Block Position */
1497   M   = ai[bn+1] - ai[bn];
1498   *nz = bs*M;
1499 
1500   if (v) {
1501     *v = 0;
1502     if (*nz) {
1503       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1504       for (i=0; i<M; i++) { /* for each block in the block row */
1505         v_i  = *v + i*bs;
1506         aa_i = aa + bs2*(ai[bn] + i);
1507         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1508       }
1509     }
1510   }
1511 
1512   if (idx) {
1513     *idx = 0;
1514     if (*nz) {
1515       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1516       for (i=0; i<M; i++) { /* for each block in the block row */
1517         idx_i = *idx + i*bs;
1518         itmp  = bs*aj[ai[bn] + i];
1519         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1520       }
1521     }
1522   }
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 #undef __FUNCT__
1527 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1528 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1529 {
1530   PetscErrorCode ierr;
1531 
1532   PetscFunctionBegin;
1533   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1534   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1539 
1540 #undef __FUNCT__
1541 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1542 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1543 {
1544   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1545   Mat            C;
1546   PetscErrorCode ierr;
1547   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1548   PetscInt       *rows,*cols,bs2=a->bs2;
1549   MatScalar      *array;
1550 
1551   PetscFunctionBegin;
1552   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1553   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1554     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1555     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1556 
1557     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1558     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1559     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1560     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1561     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1562     ierr = PetscFree(col);CHKERRQ(ierr);
1563   } else {
1564     C = *B;
1565   }
1566 
1567   array = a->a;
1568   ierr  = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1569   for (i=0; i<mbs; i++) {
1570     cols[0] = i*bs;
1571     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1572     len = ai[i+1] - ai[i];
1573     for (j=0; j<len; j++) {
1574       rows[0] = (*aj++)*bs;
1575       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1576       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1577       array += bs2;
1578     }
1579   }
1580   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1581 
1582   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1584 
1585   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1586     *B = C;
1587   } else {
1588     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1589   }
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 EXTERN_C_BEGIN
1594 #undef __FUNCT__
1595 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1596 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1597 {
1598   PetscErrorCode ierr;
1599   Mat            Btrans;
1600 
1601   PetscFunctionBegin;
1602   *f   = PETSC_FALSE;
1603   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1604   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1605   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1606   PetscFunctionReturn(0);
1607 }
1608 EXTERN_C_END
1609 
1610 #undef __FUNCT__
1611 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1612 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1613 {
1614   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1615   PetscErrorCode ierr;
1616   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1617   int            fd;
1618   PetscScalar    *aa;
1619   FILE           *file;
1620 
1621   PetscFunctionBegin;
1622   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1623   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1624   col_lens[0] = MAT_FILE_CLASSID;
1625 
1626   col_lens[1] = A->rmap->N;
1627   col_lens[2] = A->cmap->n;
1628   col_lens[3] = a->nz*bs2;
1629 
1630   /* store lengths of each row and write (including header) to file */
1631   count = 0;
1632   for (i=0; i<a->mbs; i++) {
1633     for (j=0; j<bs; j++) {
1634       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1635     }
1636   }
1637   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1638   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1639 
1640   /* store column indices (zero start index) */
1641   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1642   count = 0;
1643   for (i=0; i<a->mbs; i++) {
1644     for (j=0; j<bs; j++) {
1645       for (k=a->i[i]; k<a->i[i+1]; k++) {
1646         for (l=0; l<bs; l++) {
1647           jj[count++] = bs*a->j[k] + l;
1648         }
1649       }
1650     }
1651   }
1652   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1653   ierr = PetscFree(jj);CHKERRQ(ierr);
1654 
1655   /* store nonzero values */
1656   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1657   count = 0;
1658   for (i=0; i<a->mbs; i++) {
1659     for (j=0; j<bs; j++) {
1660       for (k=a->i[i]; k<a->i[i+1]; k++) {
1661         for (l=0; l<bs; l++) {
1662           aa[count++] = a->a[bs2*k + l*bs + j];
1663         }
1664       }
1665     }
1666   }
1667   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1668   ierr = PetscFree(aa);CHKERRQ(ierr);
1669 
1670   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1671   if (file) {
1672     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1673   }
1674   PetscFunctionReturn(0);
1675 }
1676 
1677 #undef __FUNCT__
1678 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1679 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1680 {
1681   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1682   PetscErrorCode    ierr;
1683   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1684   PetscViewerFormat format;
1685 
1686   PetscFunctionBegin;
1687   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1688   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1689     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1690   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1691     Mat aij;
1692     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1693     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1694     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1695   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1696       PetscFunctionReturn(0);
1697   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1698     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1699     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1700     for (i=0; i<a->mbs; i++) {
1701       for (j=0; j<bs; j++) {
1702         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1703         for (k=a->i[i]; k<a->i[i+1]; k++) {
1704           for (l=0; l<bs; l++) {
1705 #if defined(PETSC_USE_COMPLEX)
1706             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1707               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1708                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1709             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1710               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1711                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1712             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1713               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1714             }
1715 #else
1716             if (a->a[bs2*k + l*bs + j] != 0.0) {
1717               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1718             }
1719 #endif
1720           }
1721         }
1722         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1723       }
1724     }
1725     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1726   } else {
1727     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1728     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1729     for (i=0; i<a->mbs; i++) {
1730       for (j=0; j<bs; j++) {
1731         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1732         for (k=a->i[i]; k<a->i[i+1]; k++) {
1733           for (l=0; l<bs; l++) {
1734 #if defined(PETSC_USE_COMPLEX)
1735             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1736               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1737                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1738             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1739               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1740                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1741             } else {
1742               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1743             }
1744 #else
1745             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1746 #endif
1747           }
1748         }
1749         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1750       }
1751     }
1752     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1753   }
1754   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1755   PetscFunctionReturn(0);
1756 }
1757 
1758 #undef __FUNCT__
1759 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1760 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1761 {
1762   Mat               A = (Mat) Aa;
1763   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1764   PetscErrorCode    ierr;
1765   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1766   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1767   MatScalar         *aa;
1768   PetscViewer       viewer;
1769   PetscViewerFormat format;
1770 
1771   PetscFunctionBegin;
1772   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1773   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1774 
1775   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1776 
1777   /* loop over matrix elements drawing boxes */
1778 
1779   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1780     color = PETSC_DRAW_BLUE;
1781     for (i=0,row=0; i<mbs; i++,row+=bs) {
1782       for (j=a->i[i]; j<a->i[i+1]; j++) {
1783         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1784         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1785         aa  = a->a + j*bs2;
1786         for (k=0; k<bs; k++) {
1787           for (l=0; l<bs; l++) {
1788             if (PetscRealPart(*aa++) >=  0.) continue;
1789             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1790           }
1791         }
1792       }
1793     }
1794     color = PETSC_DRAW_CYAN;
1795     for (i=0,row=0; i<mbs; i++,row+=bs) {
1796       for (j=a->i[i]; j<a->i[i+1]; j++) {
1797         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1798         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1799         aa  = a->a + j*bs2;
1800         for (k=0; k<bs; k++) {
1801           for (l=0; l<bs; l++) {
1802             if (PetscRealPart(*aa++) != 0.) continue;
1803             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1804           }
1805         }
1806       }
1807     }
1808     color = PETSC_DRAW_RED;
1809     for (i=0,row=0; i<mbs; i++,row+=bs) {
1810       for (j=a->i[i]; j<a->i[i+1]; j++) {
1811         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1812         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1813         aa  = a->a + j*bs2;
1814         for (k=0; k<bs; k++) {
1815           for (l=0; l<bs; l++) {
1816             if (PetscRealPart(*aa++) <= 0.) continue;
1817             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1818           }
1819         }
1820       }
1821     }
1822   } else {
1823     /* use contour shading to indicate magnitude of values */
1824     /* first determine max of all nonzero values */
1825     PetscDraw popup;
1826     PetscReal scale,maxv = 0.0;
1827 
1828     for (i=0; i<a->nz*a->bs2; i++) {
1829       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1830     }
1831     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1832     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1833     if (popup) {
1834       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1835     }
1836     for (i=0,row=0; i<mbs; i++,row+=bs) {
1837       for (j=a->i[i]; j<a->i[i+1]; j++) {
1838         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1839         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1840         aa  = a->a + j*bs2;
1841         for (k=0; k<bs; k++) {
1842           for (l=0; l<bs; l++) {
1843             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1844             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1845           }
1846         }
1847       }
1848     }
1849   }
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 #undef __FUNCT__
1854 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1855 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1856 {
1857   PetscErrorCode ierr;
1858   PetscReal      xl,yl,xr,yr,w,h;
1859   PetscDraw      draw;
1860   PetscBool      isnull;
1861 
1862   PetscFunctionBegin;
1863   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1864   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1865 
1866   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1867   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1868   xr  += w;    yr += h;  xl = -w;     yl = -h;
1869   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1870   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1871   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 #undef __FUNCT__
1876 #define __FUNCT__ "MatView_SeqBAIJ"
1877 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1878 {
1879   PetscErrorCode ierr;
1880   PetscBool      iascii,isbinary,isdraw;
1881 
1882   PetscFunctionBegin;
1883   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1884   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1885   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1886   if (iascii) {
1887     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1888   } else if (isbinary) {
1889     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1890   } else if (isdraw) {
1891     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1892   } else {
1893     Mat B;
1894     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1895     ierr = MatView(B,viewer);CHKERRQ(ierr);
1896     ierr = MatDestroy(&B);CHKERRQ(ierr);
1897   }
1898   PetscFunctionReturn(0);
1899 }
1900 
1901 
1902 #undef __FUNCT__
1903 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1904 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1905 {
1906   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1907   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1908   PetscInt    *ai = a->i,*ailen = a->ilen;
1909   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1910   MatScalar   *ap,*aa = a->a;
1911 
1912   PetscFunctionBegin;
1913   for (k=0; k<m; k++) { /* loop over rows */
1914     row = im[k]; brow = row/bs;
1915     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1916     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1917     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1918     nrow = ailen[brow];
1919     for (l=0; l<n; l++) { /* loop over columns */
1920       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1921       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1922       col  = in[l];
1923       bcol = col/bs;
1924       cidx = col%bs;
1925       ridx = row%bs;
1926       high = nrow;
1927       low  = 0; /* assume unsorted */
1928       while (high-low > 5) {
1929         t = (low+high)/2;
1930         if (rp[t] > bcol) high = t;
1931         else             low  = t;
1932       }
1933       for (i=low; i<high; i++) {
1934         if (rp[i] > bcol) break;
1935         if (rp[i] == bcol) {
1936           *v++ = ap[bs2*i+bs*cidx+ridx];
1937           goto finished;
1938         }
1939       }
1940       *v++ = 0.0;
1941 finished:;
1942     }
1943   }
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 #undef __FUNCT__
1948 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1949 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1950 {
1951   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1952   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1953   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1954   PetscErrorCode    ierr;
1955   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1956   PetscBool         roworiented=a->roworiented;
1957   const PetscScalar *value     = v;
1958   MatScalar         *ap,*aa = a->a,*bap;
1959 
1960   PetscFunctionBegin;
1961   if (roworiented) {
1962     stepval = (n-1)*bs;
1963   } else {
1964     stepval = (m-1)*bs;
1965   }
1966   for (k=0; k<m; k++) { /* loop over added rows */
1967     row = im[k];
1968     if (row < 0) continue;
1969 #if defined(PETSC_USE_DEBUG)
1970     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1971 #endif
1972     rp   = aj + ai[row];
1973     ap   = aa + bs2*ai[row];
1974     rmax = imax[row];
1975     nrow = ailen[row];
1976     low  = 0;
1977     high = nrow;
1978     for (l=0; l<n; l++) { /* loop over added columns */
1979       if (in[l] < 0) continue;
1980 #if defined(PETSC_USE_DEBUG)
1981       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1982 #endif
1983       col = in[l];
1984       if (roworiented) {
1985         value = v + (k*(stepval+bs) + l)*bs;
1986       } else {
1987         value = v + (l*(stepval+bs) + k)*bs;
1988       }
1989       if (col <= lastcol) low = 0;
1990       else high = nrow;
1991       lastcol = col;
1992       while (high-low > 7) {
1993         t = (low+high)/2;
1994         if (rp[t] > col) high = t;
1995         else             low  = t;
1996       }
1997       for (i=low; i<high; i++) {
1998         if (rp[i] > col) break;
1999         if (rp[i] == col) {
2000           bap = ap +  bs2*i;
2001           if (roworiented) {
2002             if (is == ADD_VALUES) {
2003               for (ii=0; ii<bs; ii++,value+=stepval) {
2004                 for (jj=ii; jj<bs2; jj+=bs) {
2005                   bap[jj] += *value++;
2006                 }
2007               }
2008             } else {
2009               for (ii=0; ii<bs; ii++,value+=stepval) {
2010                 for (jj=ii; jj<bs2; jj+=bs) {
2011                   bap[jj] = *value++;
2012                 }
2013               }
2014             }
2015           } else {
2016             if (is == ADD_VALUES) {
2017               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2018                 for (jj=0; jj<bs; jj++) {
2019                   bap[jj] += value[jj];
2020                 }
2021                 bap += bs;
2022               }
2023             } else {
2024               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2025                 for (jj=0; jj<bs; jj++) {
2026                   bap[jj]  = value[jj];
2027                 }
2028                 bap += bs;
2029               }
2030             }
2031           }
2032           goto noinsert2;
2033         }
2034       }
2035       if (nonew == 1) goto noinsert2;
2036       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2037       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2038       N = nrow++ - 1; high++;
2039       /* shift up all the later entries in this row */
2040       for (ii=N; ii>=i; ii--) {
2041         rp[ii+1] = rp[ii];
2042         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2043       }
2044       if (N >= i) {
2045         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2046       }
2047       rp[i] = col;
2048       bap   = ap +  bs2*i;
2049       if (roworiented) {
2050         for (ii=0; ii<bs; ii++,value+=stepval) {
2051           for (jj=ii; jj<bs2; jj+=bs) {
2052             bap[jj] = *value++;
2053           }
2054         }
2055       } else {
2056         for (ii=0; ii<bs; ii++,value+=stepval) {
2057           for (jj=0; jj<bs; jj++) {
2058             *bap++ = *value++;
2059           }
2060         }
2061       }
2062 noinsert2:;
2063       low = i;
2064     }
2065     ailen[row] = nrow;
2066   }
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 #undef __FUNCT__
2071 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2072 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2073 {
2074   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
2075   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2076   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
2077   PetscErrorCode ierr;
2078   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
2079   MatScalar      *aa  = a->a,*ap;
2080   PetscReal      ratio=0.6;
2081 
2082   PetscFunctionBegin;
2083   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2084 
2085   if (m) rmax = ailen[0];
2086   for (i=1; i<mbs; i++) {
2087     /* move each row back by the amount of empty slots (fshift) before it*/
2088     fshift += imax[i-1] - ailen[i-1];
2089     rmax    = PetscMax(rmax,ailen[i]);
2090     if (fshift) {
2091       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2092       N  = ailen[i];
2093       for (j=0; j<N; j++) {
2094         ip[j-fshift] = ip[j];
2095 
2096         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2097       }
2098     }
2099     ai[i] = ai[i-1] + ailen[i-1];
2100   }
2101   if (mbs) {
2102     fshift += imax[mbs-1] - ailen[mbs-1];
2103     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2104   }
2105   /* reset ilen and imax for each row */
2106   for (i=0; i<mbs; i++) {
2107     ailen[i] = imax[i] = ai[i+1] - ai[i];
2108   }
2109   a->nz = ai[mbs];
2110 
2111   /* diagonals may have moved, so kill the diagonal pointers */
2112   a->idiagvalid = PETSC_FALSE;
2113   if (fshift && a->diag) {
2114     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
2115     ierr    = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2116     a->diag = 0;
2117   }
2118   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2119   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2120   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2121   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2122 
2123   A->info.mallocs    += a->reallocs;
2124   a->reallocs         = 0;
2125   A->info.nz_unneeded = (PetscReal)fshift*bs2;
2126 
2127   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2128 
2129   A->same_nonzero = PETSC_TRUE;
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 /*
2134    This function returns an array of flags which indicate the locations of contiguous
2135    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2136    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2137    Assume: sizes should be long enough to hold all the values.
2138 */
2139 #undef __FUNCT__
2140 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2141 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2142 {
2143   PetscInt  i,j,k,row;
2144   PetscBool flg;
2145 
2146   PetscFunctionBegin;
2147   for (i=0,j=0; i<n; j++) {
2148     row = idx[i];
2149     if (row%bs!=0) { /* Not the begining of a block */
2150       sizes[j] = 1;
2151       i++;
2152     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2153       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2154       i++;
2155     } else { /* Begining of the block, so check if the complete block exists */
2156       flg = PETSC_TRUE;
2157       for (k=1; k<bs; k++) {
2158         if (row+k != idx[i+k]) { /* break in the block */
2159           flg = PETSC_FALSE;
2160           break;
2161         }
2162       }
2163       if (flg) { /* No break in the bs */
2164         sizes[j] = bs;
2165         i       += bs;
2166       } else {
2167         sizes[j] = 1;
2168         i++;
2169       }
2170     }
2171   }
2172   *bs_max = j;
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 #undef __FUNCT__
2177 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2178 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2179 {
2180   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2181   PetscErrorCode    ierr;
2182   PetscInt          i,j,k,count,*rows;
2183   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2184   PetscScalar       zero = 0.0;
2185   MatScalar         *aa;
2186   const PetscScalar *xx;
2187   PetscScalar       *bb;
2188 
2189   PetscFunctionBegin;
2190   /* fix right hand side if needed */
2191   if (x && b) {
2192     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2193     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2194     for (i=0; i<is_n; i++) {
2195       bb[is_idx[i]] = diag*xx[is_idx[i]];
2196     }
2197     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2198     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2199   }
2200 
2201   /* Make a copy of the IS and  sort it */
2202   /* allocate memory for rows,sizes */
2203   ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2204 
2205   /* copy IS values to rows, and sort them */
2206   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2207   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2208 
2209   if (baij->keepnonzeropattern) {
2210     for (i=0; i<is_n; i++) sizes[i] = 1;
2211     bs_max          = is_n;
2212     A->same_nonzero = PETSC_TRUE;
2213   } else {
2214     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2215 
2216     A->same_nonzero = PETSC_FALSE;
2217   }
2218 
2219   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2220     row = rows[j];
2221     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2222     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2223     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2224     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2225       if (diag != (PetscScalar)0.0) {
2226         if (baij->ilen[row/bs] > 0) {
2227           baij->ilen[row/bs]       = 1;
2228           baij->j[baij->i[row/bs]] = row/bs;
2229 
2230           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2231         }
2232         /* Now insert all the diagonal values for this bs */
2233         for (k=0; k<bs; k++) {
2234           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2235         }
2236       } else { /* (diag == 0.0) */
2237         baij->ilen[row/bs] = 0;
2238       } /* end (diag == 0.0) */
2239     } else { /* (sizes[i] != bs) */
2240 #if defined(PETSC_USE_DEBUG)
2241       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2242 #endif
2243       for (k=0; k<count; k++) {
2244         aa[0] =  zero;
2245         aa   += bs;
2246       }
2247       if (diag != (PetscScalar)0.0) {
2248         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2249       }
2250     }
2251   }
2252 
2253   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2254   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 #undef __FUNCT__
2259 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2260 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2261 {
2262   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2263   PetscErrorCode    ierr;
2264   PetscInt          i,j,k,count;
2265   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2266   PetscScalar       zero = 0.0;
2267   MatScalar         *aa;
2268   const PetscScalar *xx;
2269   PetscScalar       *bb;
2270   PetscBool         *zeroed,vecs = PETSC_FALSE;
2271 
2272   PetscFunctionBegin;
2273   /* fix right hand side if needed */
2274   if (x && b) {
2275     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2276     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2277     vecs = PETSC_TRUE;
2278   }
2279   A->same_nonzero = PETSC_TRUE;
2280 
2281   /* zero the columns */
2282   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2283   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2284   for (i=0; i<is_n; i++) {
2285     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2286     zeroed[is_idx[i]] = PETSC_TRUE;
2287   }
2288   for (i=0; i<A->rmap->N; i++) {
2289     if (!zeroed[i]) {
2290       row = i/bs;
2291       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2292         for (k=0; k<bs; k++) {
2293           col = bs*baij->j[j] + k;
2294           if (zeroed[col]) {
2295             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2296             if (vecs) bb[i] -= aa[0]*xx[col];
2297             aa[0] = 0.0;
2298           }
2299         }
2300       }
2301     } else if (vecs) bb[i] = diag*xx[i];
2302   }
2303   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2304   if (vecs) {
2305     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2306     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2307   }
2308 
2309   /* zero the rows */
2310   for (i=0; i<is_n; i++) {
2311     row   = is_idx[i];
2312     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2313     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2314     for (k=0; k<count; k++) {
2315       aa[0] =  zero;
2316       aa   += bs;
2317     }
2318     if (diag != (PetscScalar)0.0) {
2319       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2320     }
2321   }
2322   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2323   PetscFunctionReturn(0);
2324 }
2325 
2326 #undef __FUNCT__
2327 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2328 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2329 {
2330   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2331   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2332   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2333   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2334   PetscErrorCode ierr;
2335   PetscInt       ridx,cidx,bs2=a->bs2;
2336   PetscBool      roworiented=a->roworiented;
2337   MatScalar      *ap,value,*aa=a->a,*bap;
2338 
2339   PetscFunctionBegin;
2340   if (v) PetscValidScalarPointer(v,6);
2341   for (k=0; k<m; k++) { /* loop over added rows */
2342     row  = im[k];
2343     brow = row/bs;
2344     if (row < 0) continue;
2345 #if defined(PETSC_USE_DEBUG)
2346     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2347 #endif
2348     rp   = aj + ai[brow];
2349     ap   = aa + bs2*ai[brow];
2350     rmax = imax[brow];
2351     nrow = ailen[brow];
2352     low  = 0;
2353     high = nrow;
2354     for (l=0; l<n; l++) { /* loop over added columns */
2355       if (in[l] < 0) continue;
2356 #if defined(PETSC_USE_DEBUG)
2357       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2358 #endif
2359       col  = in[l]; bcol = col/bs;
2360       ridx = row % bs; cidx = col % bs;
2361       if (roworiented) {
2362         value = v[l + k*n];
2363       } else {
2364         value = v[k + l*m];
2365       }
2366       if (col <= lastcol) low = 0; else high = nrow;
2367       lastcol = col;
2368       while (high-low > 7) {
2369         t = (low+high)/2;
2370         if (rp[t] > bcol) high = t;
2371         else              low  = t;
2372       }
2373       for (i=low; i<high; i++) {
2374         if (rp[i] > bcol) break;
2375         if (rp[i] == bcol) {
2376           bap = ap +  bs2*i + bs*cidx + ridx;
2377           if (is == ADD_VALUES) *bap += value;
2378           else                  *bap  = value;
2379           goto noinsert1;
2380         }
2381       }
2382       if (nonew == 1) goto noinsert1;
2383       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2384       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2385       N = nrow++ - 1; high++;
2386       /* shift up all the later entries in this row */
2387       for (ii=N; ii>=i; ii--) {
2388         rp[ii+1] = rp[ii];
2389         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2390       }
2391       if (N>=i) {
2392         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2393       }
2394       rp[i]                      = bcol;
2395       ap[bs2*i + bs*cidx + ridx] = value;
2396       a->nz++;
2397 noinsert1:;
2398       low = i;
2399     }
2400     ailen[brow] = nrow;
2401   }
2402   A->same_nonzero = PETSC_FALSE;
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2408 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2409 {
2410   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2411   Mat            outA;
2412   PetscErrorCode ierr;
2413   PetscBool      row_identity,col_identity;
2414 
2415   PetscFunctionBegin;
2416   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2417   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2418   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2419   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2420 
2421   outA            = inA;
2422   inA->factortype = MAT_FACTOR_LU;
2423 
2424   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2425 
2426   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2427   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2428   a->row = row;
2429   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2430   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2431   a->col = col;
2432 
2433   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2434   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2435   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2436   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2437 
2438   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2439   if (!a->solve_work) {
2440     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2441     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2442   }
2443   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 EXTERN_C_BEGIN
2448 #undef __FUNCT__
2449 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2450 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2451 {
2452   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2453   PetscInt    i,nz,mbs;
2454 
2455   PetscFunctionBegin;
2456   nz  = baij->maxnz;
2457   mbs = baij->mbs;
2458   for (i=0; i<nz; i++) {
2459     baij->j[i] = indices[i];
2460   }
2461   baij->nz = nz;
2462   for (i=0; i<mbs; i++) {
2463     baij->ilen[i] = baij->imax[i];
2464   }
2465   PetscFunctionReturn(0);
2466 }
2467 EXTERN_C_END
2468 
2469 #undef __FUNCT__
2470 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2471 /*@
2472     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2473        in the matrix.
2474 
2475   Input Parameters:
2476 +  mat - the SeqBAIJ matrix
2477 -  indices - the column indices
2478 
2479   Level: advanced
2480 
2481   Notes:
2482     This can be called if you have precomputed the nonzero structure of the
2483   matrix and want to provide it to the matrix object to improve the performance
2484   of the MatSetValues() operation.
2485 
2486     You MUST have set the correct numbers of nonzeros per row in the call to
2487   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2488 
2489     MUST be called before any calls to MatSetValues();
2490 
2491 @*/
2492 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2493 {
2494   PetscErrorCode ierr;
2495 
2496   PetscFunctionBegin;
2497   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2498   PetscValidPointer(indices,2);
2499   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2500   PetscFunctionReturn(0);
2501 }
2502 
2503 #undef __FUNCT__
2504 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2505 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2506 {
2507   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2508   PetscErrorCode ierr;
2509   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2510   PetscReal      atmp;
2511   PetscScalar    *x,zero = 0.0;
2512   MatScalar      *aa;
2513   PetscInt       ncols,brow,krow,kcol;
2514 
2515   PetscFunctionBegin;
2516   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2517   bs  = A->rmap->bs;
2518   aa  = a->a;
2519   ai  = a->i;
2520   aj  = a->j;
2521   mbs = a->mbs;
2522 
2523   ierr = VecSet(v,zero);CHKERRQ(ierr);
2524   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2525   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2526   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2527   for (i=0; i<mbs; i++) {
2528     ncols = ai[1] - ai[0]; ai++;
2529     brow  = bs*i;
2530     for (j=0; j<ncols; j++) {
2531       for (kcol=0; kcol<bs; kcol++) {
2532         for (krow=0; krow<bs; krow++) {
2533           atmp = PetscAbsScalar(*aa);aa++;
2534           row  = brow + krow;   /* row index */
2535           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2536           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2537         }
2538       }
2539       aj++;
2540     }
2541   }
2542   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 #undef __FUNCT__
2547 #define __FUNCT__ "MatCopy_SeqBAIJ"
2548 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2549 {
2550   PetscErrorCode ierr;
2551 
2552   PetscFunctionBegin;
2553   /* If the two matrices have the same copy implementation, use fast copy. */
2554   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2555     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2556     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2557     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2558 
2559     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2560     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2561     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2562   } else {
2563     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2564   }
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 #undef __FUNCT__
2569 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2570 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2571 {
2572   PetscErrorCode ierr;
2573 
2574   PetscFunctionBegin;
2575   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 #undef __FUNCT__
2580 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2581 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2582 {
2583   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2584 
2585   PetscFunctionBegin;
2586   *array = a->a;
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 #undef __FUNCT__
2591 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2592 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2593 {
2594   PetscFunctionBegin;
2595   PetscFunctionReturn(0);
2596 }
2597 
2598 #undef __FUNCT__
2599 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2600 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2601 {
2602   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2603   PetscErrorCode ierr;
2604   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2605   PetscBLASInt   one=1;
2606 
2607   PetscFunctionBegin;
2608   if (str == SAME_NONZERO_PATTERN) {
2609     PetscScalar  alpha = a;
2610     PetscBLASInt bnz;
2611     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2612     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2613   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2614     if (y->xtoy && y->XtoY != X) {
2615       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2616       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2617     }
2618     if (!y->xtoy) { /* get xtoy */
2619       ierr    = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2620       y->XtoY = X;
2621       ierr    = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2622     }
2623     for (i=0; i<x->nz; i++) {
2624       j = 0;
2625       while (j < bs2) {
2626         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2627         j++;
2628       }
2629     }
2630     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2631   } else {
2632     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2633   }
2634   PetscFunctionReturn(0);
2635 }
2636 
2637 #undef __FUNCT__
2638 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2639 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2640 {
2641   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2642   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2643   MatScalar   *aa = a->a;
2644 
2645   PetscFunctionBegin;
2646   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2647   PetscFunctionReturn(0);
2648 }
2649 
2650 #undef __FUNCT__
2651 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2652 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2653 {
2654   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2655   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2656   MatScalar   *aa = a->a;
2657 
2658   PetscFunctionBegin;
2659   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2660   PetscFunctionReturn(0);
2661 }
2662 
2663 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2664 
2665 #undef __FUNCT__
2666 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2667 /*
2668     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2669 */
2670 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2671 {
2672   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2673   PetscErrorCode ierr;
2674   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2675   PetscInt       nz = a->i[m],row,*jj,mr,col;
2676 
2677   PetscFunctionBegin;
2678   *nn = n;
2679   if (!ia) PetscFunctionReturn(0);
2680   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2681   else {
2682     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2683     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2684     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2685     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2686     jj   = a->j;
2687     for (i=0; i<nz; i++) {
2688       collengths[jj[i]]++;
2689     }
2690     cia[0] = oshift;
2691     for (i=0; i<n; i++) {
2692       cia[i+1] = cia[i] + collengths[i];
2693     }
2694     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2695     jj   = a->j;
2696     for (row=0; row<m; row++) {
2697       mr = a->i[row+1] - a->i[row];
2698       for (i=0; i<mr; i++) {
2699         col = *jj++;
2700 
2701         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2702       }
2703     }
2704     ierr = PetscFree(collengths);CHKERRQ(ierr);
2705     *ia  = cia; *ja = cja;
2706   }
2707   PetscFunctionReturn(0);
2708 }
2709 
2710 #undef __FUNCT__
2711 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2712 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2713 {
2714   PetscErrorCode ierr;
2715 
2716   PetscFunctionBegin;
2717   if (!ia) PetscFunctionReturn(0);
2718   ierr = PetscFree(*ia);CHKERRQ(ierr);
2719   ierr = PetscFree(*ja);CHKERRQ(ierr);
2720   PetscFunctionReturn(0);
2721 }
2722 
2723 #undef __FUNCT__
2724 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2725 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2726 {
2727   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
2728   PetscErrorCode ierr;
2729   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2730   PetscScalar    dx,*y,*xx,*w3_array;
2731   PetscScalar    *vscale_array;
2732   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2733   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
2734   void           *fctx   = coloring->fctx;
2735   PetscBool      flg     = PETSC_FALSE;
2736   PetscInt       ctype   = coloring->ctype,N,col_start=0,col_end=0;
2737   Vec            x1_tmp;
2738 
2739   PetscFunctionBegin;
2740   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2741   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2742   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2743   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2744 
2745   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2746   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2747   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2748   if (flg) {
2749     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2750   } else {
2751     PetscBool assembled;
2752     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2753     if (assembled) {
2754       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2755     }
2756   }
2757 
2758   x1_tmp = x1;
2759   if (!coloring->vscale) {
2760     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2761   }
2762 
2763   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2764     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2765   }
2766   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2767 
2768   /* Set w1 = F(x1) */
2769   if (!coloring->fset) {
2770     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2771     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2772     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2773   } else {
2774     coloring->fset = PETSC_FALSE;
2775   }
2776 
2777   if (!coloring->w3) {
2778     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2779     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2780   }
2781   w3 = coloring->w3;
2782 
2783   CHKMEMQ;
2784   /* Compute all the local scale factors, including ghost points */
2785   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2786   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2787   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2788   if (ctype == IS_COLORING_GHOSTED) {
2789     col_start = 0; col_end = N;
2790   } else if (ctype == IS_COLORING_GLOBAL) {
2791     xx           = xx - start;
2792     vscale_array = vscale_array - start;
2793     col_start    = start; col_end = N + start;
2794   }
2795   CHKMEMQ;
2796   for (col=col_start; col<col_end; col++) {
2797     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2798     if (coloring->htype[0] == 'w') {
2799       dx = 1.0 + unorm;
2800     } else {
2801       dx = xx[col];
2802     }
2803     if (dx == (PetscScalar)0.0) dx = 1.0;
2804     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2805     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2806     dx               *= epsilon;
2807     vscale_array[col] = (PetscScalar)1.0/dx;
2808   }     CHKMEMQ;
2809   if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start;
2810   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2811   if (ctype == IS_COLORING_GLOBAL) {
2812     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2813     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2814   }
2815   CHKMEMQ;
2816   if (coloring->vscaleforrow) {
2817     vscaleforrow = coloring->vscaleforrow;
2818   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2819 
2820   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2821   /*
2822     Loop over each color
2823   */
2824   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2825   for (k=0; k<coloring->ncolors; k++) {
2826     coloring->currentcolor = k;
2827     for (i=0; i<bs; i++) {
2828       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2829       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2830       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2831       /*
2832         Loop over each column associated with color
2833         adding the perturbation to the vector w3.
2834       */
2835       for (l=0; l<coloring->ncolumns[k]; l++) {
2836         col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2837         if (coloring->htype[0] == 'w') {
2838           dx = 1.0 + unorm;
2839         } else {
2840           dx = xx[col];
2841         }
2842         if (dx == (PetscScalar)0.0) dx = 1.0;
2843         if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2844         else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2845         dx *= epsilon;
2846         if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2847         w3_array[col] += dx;
2848       }
2849       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2850       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2851 
2852       /*
2853         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2854         w2 = F(x1 + dx) - F(x1)
2855       */
2856       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2857       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2858       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2859       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2860 
2861       /*
2862         Loop over rows of vector, putting results into Jacobian matrix
2863       */
2864       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2865       for (l=0; l<coloring->nrows[k]; l++) {
2866         row = bs*coloring->rows[k][l];                /* local row index */
2867         col = i + bs*coloring->columnsforrow[k][l];       /* global column index */
2868         for (j=0; j<bs; j++) {
2869           y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2870           srows[j]  = row + start + j;
2871         }
2872         ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2873       }
2874       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2875     }
2876   } /* endof for each color */
2877   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2878   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2879   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2880   ierr = PetscFree(srows);CHKERRQ(ierr);
2881 
2882   coloring->currentcolor = -1;
2883 
2884   ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2885   ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2886   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2887   PetscFunctionReturn(0);
2888 }
2889 
2890 /* -------------------------------------------------------------------*/
2891 static struct _MatOps MatOps_Values = {
2892         MatSetValues_SeqBAIJ,
2893         MatGetRow_SeqBAIJ,
2894         MatRestoreRow_SeqBAIJ,
2895         MatMult_SeqBAIJ_N,
2896 /* 4*/  MatMultAdd_SeqBAIJ_N,
2897         MatMultTranspose_SeqBAIJ,
2898         MatMultTransposeAdd_SeqBAIJ,
2899         0,
2900         0,
2901         0,
2902 /* 10*/ 0,
2903         MatLUFactor_SeqBAIJ,
2904         0,
2905         0,
2906         MatTranspose_SeqBAIJ,
2907 /* 15*/ MatGetInfo_SeqBAIJ,
2908         MatEqual_SeqBAIJ,
2909         MatGetDiagonal_SeqBAIJ,
2910         MatDiagonalScale_SeqBAIJ,
2911         MatNorm_SeqBAIJ,
2912 /* 20*/ 0,
2913         MatAssemblyEnd_SeqBAIJ,
2914         MatSetOption_SeqBAIJ,
2915         MatZeroEntries_SeqBAIJ,
2916 /* 24*/ MatZeroRows_SeqBAIJ,
2917         0,
2918         0,
2919         0,
2920         0,
2921 /* 29*/ MatSetUp_SeqBAIJ,
2922         0,
2923         0,
2924         0,
2925         0,
2926 /* 34*/ MatDuplicate_SeqBAIJ,
2927         0,
2928         0,
2929         MatILUFactor_SeqBAIJ,
2930         0,
2931 /* 39*/ MatAXPY_SeqBAIJ,
2932         MatGetSubMatrices_SeqBAIJ,
2933         MatIncreaseOverlap_SeqBAIJ,
2934         MatGetValues_SeqBAIJ,
2935         MatCopy_SeqBAIJ,
2936 /* 44*/ 0,
2937         MatScale_SeqBAIJ,
2938         0,
2939         0,
2940         MatZeroRowsColumns_SeqBAIJ,
2941 /* 49*/ 0,
2942         MatGetRowIJ_SeqBAIJ,
2943         MatRestoreRowIJ_SeqBAIJ,
2944         MatGetColumnIJ_SeqBAIJ,
2945         MatRestoreColumnIJ_SeqBAIJ,
2946 /* 54*/ MatFDColoringCreate_SeqAIJ,
2947         0,
2948         0,
2949         0,
2950         MatSetValuesBlocked_SeqBAIJ,
2951 /* 59*/ MatGetSubMatrix_SeqBAIJ,
2952         MatDestroy_SeqBAIJ,
2953         MatView_SeqBAIJ,
2954         0,
2955         0,
2956 /* 64*/ 0,
2957         0,
2958         0,
2959         0,
2960         0,
2961 /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2962         0,
2963         MatConvert_Basic,
2964         0,
2965         0,
2966 /* 74*/ 0,
2967         MatFDColoringApply_BAIJ,
2968         0,
2969         0,
2970         0,
2971 /* 79*/ 0,
2972         0,
2973         0,
2974         0,
2975         MatLoad_SeqBAIJ,
2976 /* 84*/ 0,
2977         0,
2978         0,
2979         0,
2980         0,
2981 /* 89*/ 0,
2982         0,
2983         0,
2984         0,
2985         0,
2986 /* 94*/ 0,
2987         0,
2988         0,
2989         0,
2990         0,
2991 /* 99*/ 0,
2992         0,
2993         0,
2994         0,
2995         0,
2996 /*104*/ 0,
2997         MatRealPart_SeqBAIJ,
2998         MatImaginaryPart_SeqBAIJ,
2999         0,
3000         0,
3001 /*109*/ 0,
3002         0,
3003         0,
3004         0,
3005         MatMissingDiagonal_SeqBAIJ,
3006 /*114*/ 0,
3007         0,
3008         0,
3009         0,
3010         0,
3011 /*119*/ 0,
3012         0,
3013         MatMultHermitianTranspose_SeqBAIJ,
3014         MatMultHermitianTransposeAdd_SeqBAIJ,
3015         0,
3016 /*124*/ 0,
3017         0,
3018         MatInvertBlockDiagonal_SeqBAIJ
3019 };
3020 
3021 EXTERN_C_BEGIN
3022 #undef __FUNCT__
3023 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3024 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3025 {
3026   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
3027   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
3028   PetscErrorCode ierr;
3029 
3030   PetscFunctionBegin;
3031   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3032 
3033   /* allocate space for values if not already there */
3034   if (!aij->saved_values) {
3035     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3036     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3037   }
3038 
3039   /* copy values over */
3040   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3041   PetscFunctionReturn(0);
3042 }
3043 EXTERN_C_END
3044 
3045 EXTERN_C_BEGIN
3046 #undef __FUNCT__
3047 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3048 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3049 {
3050   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
3051   PetscErrorCode ierr;
3052   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3053 
3054   PetscFunctionBegin;
3055   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3056   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3057 
3058   /* copy values over */
3059   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3060   PetscFunctionReturn(0);
3061 }
3062 EXTERN_C_END
3063 
3064 EXTERN_C_BEGIN
3065 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3066 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3067 EXTERN_C_END
3068 
3069 EXTERN_C_BEGIN
3070 #undef __FUNCT__
3071 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3072 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3073 {
3074   Mat_SeqBAIJ    *b;
3075   PetscErrorCode ierr;
3076   PetscInt       i,mbs,nbs,bs2;
3077   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3078 
3079   PetscFunctionBegin;
3080   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3081   if (nz == MAT_SKIP_ALLOCATION) {
3082     skipallocation = PETSC_TRUE;
3083     nz             = 0;
3084   }
3085 
3086   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3087   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3088   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3089   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3090   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3091 
3092   B->preallocated = PETSC_TRUE;
3093 
3094   mbs = B->rmap->n/bs;
3095   nbs = B->cmap->n/bs;
3096   bs2 = bs*bs;
3097 
3098   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3099 
3100   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3101   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3102   if (nnz) {
3103     for (i=0; i<mbs; i++) {
3104       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3105       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3106     }
3107   }
3108 
3109   b    = (Mat_SeqBAIJ*)B->data;
3110   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3111   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3112   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3113 
3114   if (!flg) {
3115     switch (bs) {
3116     case 1:
3117       B->ops->mult    = MatMult_SeqBAIJ_1;
3118       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3119       B->ops->sor     = MatSOR_SeqBAIJ_1;
3120       break;
3121     case 2:
3122       B->ops->mult    = MatMult_SeqBAIJ_2;
3123       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3124       B->ops->sor     = MatSOR_SeqBAIJ_2;
3125       break;
3126     case 3:
3127       B->ops->mult    = MatMult_SeqBAIJ_3;
3128       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3129       B->ops->sor     = MatSOR_SeqBAIJ_3;
3130       break;
3131     case 4:
3132       B->ops->mult    = MatMult_SeqBAIJ_4;
3133       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3134       B->ops->sor     = MatSOR_SeqBAIJ_4;
3135       break;
3136     case 5:
3137       B->ops->mult    = MatMult_SeqBAIJ_5;
3138       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3139       B->ops->sor     = MatSOR_SeqBAIJ_5;
3140       break;
3141     case 6:
3142       B->ops->mult    = MatMult_SeqBAIJ_6;
3143       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3144       B->ops->sor     = MatSOR_SeqBAIJ_6;
3145       break;
3146     case 7:
3147       B->ops->mult    = MatMult_SeqBAIJ_7;
3148       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3149       B->ops->sor     = MatSOR_SeqBAIJ_7;
3150       break;
3151     case 15:
3152       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
3153       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3154       B->ops->sor     = MatSOR_SeqBAIJ_N;
3155       break;
3156     default:
3157       B->ops->mult    = MatMult_SeqBAIJ_N;
3158       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3159       B->ops->sor     = MatSOR_SeqBAIJ_N;
3160       break;
3161     }
3162   }
3163   b->mbs = mbs;
3164   b->nbs = nbs;
3165   if (!skipallocation) {
3166     if (!b->imax) {
3167       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3168       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3169 
3170       b->free_imax_ilen = PETSC_TRUE;
3171     }
3172     /* b->ilen will count nonzeros in each block row so far. */
3173     for (i=0; i<mbs; i++) b->ilen[i] = 0;
3174     if (!nnz) {
3175       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3176       else if (nz < 0) nz = 1;
3177       for (i=0; i<mbs; i++) b->imax[i] = nz;
3178       nz = nz*mbs;
3179     } else {
3180       nz = 0;
3181       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3182     }
3183 
3184     /* allocate the matrix space */
3185     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3186     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3187     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3188     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3189     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3190 
3191     b->singlemalloc = PETSC_TRUE;
3192     b->i[0]         = 0;
3193     for (i=1; i<mbs+1; i++) {
3194       b->i[i] = b->i[i-1] + b->imax[i-1];
3195     }
3196     b->free_a  = PETSC_TRUE;
3197     b->free_ij = PETSC_TRUE;
3198   } else {
3199     b->free_a  = PETSC_FALSE;
3200     b->free_ij = PETSC_FALSE;
3201   }
3202 
3203   b->bs2              = bs2;
3204   b->mbs              = mbs;
3205   b->nz               = 0;
3206   b->maxnz            = nz;
3207   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3208   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3209   PetscFunctionReturn(0);
3210 }
3211 EXTERN_C_END
3212 
3213 EXTERN_C_BEGIN
3214 #undef __FUNCT__
3215 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3216 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3217 {
3218   PetscInt       i,m,nz,nz_max=0,*nnz;
3219   PetscScalar    *values=0;
3220   PetscErrorCode ierr;
3221 
3222   PetscFunctionBegin;
3223   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3224   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3225   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3226   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3227   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3228   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3229   m    = B->rmap->n/bs;
3230 
3231   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3232   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3233   for (i=0; i<m; i++) {
3234     nz = ii[i+1]- ii[i];
3235     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3236     nz_max = PetscMax(nz_max, nz);
3237     nnz[i] = nz;
3238   }
3239   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3240   ierr = PetscFree(nnz);CHKERRQ(ierr);
3241 
3242   values = (PetscScalar*)V;
3243   if (!values) {
3244     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3245     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3246   }
3247   for (i=0; i<m; i++) {
3248     PetscInt          ncols  = ii[i+1] - ii[i];
3249     const PetscInt    *icols = jj + ii[i];
3250     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3251     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3252   }
3253   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3254   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3255   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3256   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3257   PetscFunctionReturn(0);
3258 }
3259 EXTERN_C_END
3260 
3261 
3262 EXTERN_C_BEGIN
3263 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3264 extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3265 #if defined(PETSC_HAVE_MUMPS)
3266 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3267 #endif
3268 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3269 EXTERN_C_END
3270 
3271 /*MC
3272    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3273    block sparse compressed row format.
3274 
3275    Options Database Keys:
3276 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3277 
3278   Level: beginner
3279 
3280 .seealso: MatCreateSeqBAIJ()
3281 M*/
3282 
3283 EXTERN_C_BEGIN
3284 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3285 EXTERN_C_END
3286 
3287 EXTERN_C_BEGIN
3288 #undef __FUNCT__
3289 #define __FUNCT__ "MatCreate_SeqBAIJ"
3290 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3291 {
3292   PetscErrorCode ierr;
3293   PetscMPIInt    size;
3294   Mat_SeqBAIJ    *b;
3295 
3296   PetscFunctionBegin;
3297   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3298   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3299 
3300   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3301   B->data = (void*)b;
3302   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3303 
3304   b->row          = 0;
3305   b->col          = 0;
3306   b->icol         = 0;
3307   b->reallocs     = 0;
3308   b->saved_values = 0;
3309 
3310   b->roworiented        = PETSC_TRUE;
3311   b->nonew              = 0;
3312   b->diag               = 0;
3313   b->solve_work         = 0;
3314   b->mult_work          = 0;
3315   B->spptr              = 0;
3316   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3317   b->keepnonzeropattern = PETSC_FALSE;
3318   b->xtoy               = 0;
3319   b->XtoY               = 0;
3320   B->same_nonzero       = PETSC_FALSE;
3321 
3322   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3323                                            "MatGetFactorAvailable_seqbaij_petsc",
3324                                            MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3325   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3326                                            "MatGetFactor_seqbaij_petsc",
3327                                            MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3328   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3329                                            "MatGetFactor_seqbaij_bstrm",
3330                                            MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3331 #if defined(PETSC_HAVE_MUMPS)
3332   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3333 #endif
3334   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3335                                            "MatInvertBlockDiagonal_SeqBAIJ",
3336                                            MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3337   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3338                                            "MatStoreValues_SeqBAIJ",
3339                                            MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3340   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3341                                            "MatRetrieveValues_SeqBAIJ",
3342                                            MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3343   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3344                                            "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3345                                            MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3346   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3347                                            "MatConvert_SeqBAIJ_SeqAIJ",
3348                                            MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3349   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3350                                            "MatConvert_SeqBAIJ_SeqSBAIJ",
3351                                            MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3352   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3353                                            "MatSeqBAIJSetPreallocation_SeqBAIJ",
3354                                            MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3355   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3356                                            "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3357                                            MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3358   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3359                                            "MatConvert_SeqBAIJ_SeqBSTRM",
3360                                            MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3361   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3362                                            "MatIsTranspose_SeqBAIJ",
3363                                            MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3364   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3365   PetscFunctionReturn(0);
3366 }
3367 EXTERN_C_END
3368 
3369 #undef __FUNCT__
3370 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3371 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3372 {
3373   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3374   PetscErrorCode ierr;
3375   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3376 
3377   PetscFunctionBegin;
3378   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3379 
3380   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3381     c->imax           = a->imax;
3382     c->ilen           = a->ilen;
3383     c->free_imax_ilen = PETSC_FALSE;
3384   } else {
3385     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3386     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3387     for (i=0; i<mbs; i++) {
3388       c->imax[i] = a->imax[i];
3389       c->ilen[i] = a->ilen[i];
3390     }
3391     c->free_imax_ilen = PETSC_TRUE;
3392   }
3393 
3394   /* allocate the matrix space */
3395   if (mallocmatspace) {
3396     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3397       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3398       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3399       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3400 
3401       c->i            = a->i;
3402       c->j            = a->j;
3403       c->singlemalloc = PETSC_FALSE;
3404       c->free_a       = PETSC_TRUE;
3405       c->free_ij      = PETSC_FALSE;
3406       c->parent       = A;
3407       C->preallocated = PETSC_TRUE;
3408       C->assembled    = PETSC_TRUE;
3409 
3410       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3411       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3412       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3413     } else {
3414       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3415       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3416 
3417       c->singlemalloc = PETSC_TRUE;
3418       c->free_a       = PETSC_TRUE;
3419       c->free_ij      = PETSC_TRUE;
3420 
3421       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3422       if (mbs > 0) {
3423         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3424         if (cpvalues == MAT_COPY_VALUES) {
3425           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3426         } else {
3427           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3428         }
3429       }
3430       C->preallocated = PETSC_TRUE;
3431       C->assembled    = PETSC_TRUE;
3432     }
3433   }
3434 
3435   c->roworiented = a->roworiented;
3436   c->nonew       = a->nonew;
3437 
3438   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3439   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3440 
3441   c->bs2         = a->bs2;
3442   c->mbs         = a->mbs;
3443   c->nbs         = a->nbs;
3444 
3445   if (a->diag) {
3446     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3447       c->diag      = a->diag;
3448       c->free_diag = PETSC_FALSE;
3449     } else {
3450       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3451       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3452       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3453       c->free_diag = PETSC_TRUE;
3454     }
3455   } else c->diag = 0;
3456 
3457   c->nz         = a->nz;
3458   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3459   c->solve_work = 0;
3460   c->mult_work  = 0;
3461 
3462   c->compressedrow.use   = a->compressedrow.use;
3463   c->compressedrow.nrows = a->compressedrow.nrows;
3464   c->compressedrow.check = a->compressedrow.check;
3465   if (a->compressedrow.use) {
3466     i    = a->compressedrow.nrows;
3467     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3468     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3469     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3470     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3471   } else {
3472     c->compressedrow.use    = PETSC_FALSE;
3473     c->compressedrow.i      = PETSC_NULL;
3474     c->compressedrow.rindex = PETSC_NULL;
3475   }
3476   C->same_nonzero = A->same_nonzero;
3477 
3478   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3479   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3480   PetscFunctionReturn(0);
3481 }
3482 
3483 #undef __FUNCT__
3484 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3485 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3486 {
3487   PetscErrorCode ierr;
3488 
3489   PetscFunctionBegin;
3490   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3491   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3492   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3493   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3494   PetscFunctionReturn(0);
3495 }
3496 
3497 #undef __FUNCT__
3498 #define __FUNCT__ "MatLoad_SeqBAIJ"
3499 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3500 {
3501   Mat_SeqBAIJ    *a;
3502   PetscErrorCode ierr;
3503   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3504   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3505   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3506   PetscInt       *masked,nmask,tmp,bs2,ishift;
3507   PetscMPIInt    size;
3508   int            fd;
3509   PetscScalar    *aa;
3510   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3511 
3512   PetscFunctionBegin;
3513   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3514   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3515   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3516   bs2  = bs*bs;
3517 
3518   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3519   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3520   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3521   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3522   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3523   M = header[1]; N = header[2]; nz = header[3];
3524 
3525   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3526   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3527 
3528   /*
3529      This code adds extra rows to make sure the number of rows is
3530     divisible by the blocksize
3531   */
3532   mbs        = M/bs;
3533   extra_rows = bs - M + bs*(mbs);
3534   if (extra_rows == bs) extra_rows = 0;
3535   else mbs++;
3536   if (extra_rows) {
3537     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3538   }
3539 
3540   /* Set global sizes if not already set */
3541   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3542     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3543   } else { /* Check if the matrix global sizes are correct */
3544     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3545     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3546       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3547     }
3548     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3549   }
3550 
3551   /* read in row lengths */
3552   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3553   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3554   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3555 
3556   /* read in column indices */
3557   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3558   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3559   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3560 
3561   /* loop over row lengths determining block row lengths */
3562   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3563   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3564   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3565   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3566   rowcount = 0;
3567   nzcount  = 0;
3568   for (i=0; i<mbs; i++) {
3569     nmask = 0;
3570     for (j=0; j<bs; j++) {
3571       kmax = rowlengths[rowcount];
3572       for (k=0; k<kmax; k++) {
3573         tmp = jj[nzcount++]/bs;
3574         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3575       }
3576       rowcount++;
3577     }
3578     browlengths[i] += nmask;
3579     /* zero out the mask elements we set */
3580     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3581   }
3582 
3583   /* Do preallocation  */
3584   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3585   a    = (Mat_SeqBAIJ*)newmat->data;
3586 
3587   /* set matrix "i" values */
3588   a->i[0] = 0;
3589   for (i=1; i<= mbs; i++) {
3590     a->i[i]      = a->i[i-1] + browlengths[i-1];
3591     a->ilen[i-1] = browlengths[i-1];
3592   }
3593   a->nz = 0;
3594   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3595 
3596   /* read in nonzero values */
3597   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3598   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3599   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3600 
3601   /* set "a" and "j" values into matrix */
3602   nzcount = 0; jcount = 0;
3603   for (i=0; i<mbs; i++) {
3604     nzcountb = nzcount;
3605     nmask    = 0;
3606     for (j=0; j<bs; j++) {
3607       kmax = rowlengths[i*bs+j];
3608       for (k=0; k<kmax; k++) {
3609         tmp = jj[nzcount++]/bs;
3610         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3611       }
3612     }
3613     /* sort the masked values */
3614     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3615 
3616     /* set "j" values into matrix */
3617     maskcount = 1;
3618     for (j=0; j<nmask; j++) {
3619       a->j[jcount++]  = masked[j];
3620       mask[masked[j]] = maskcount++;
3621     }
3622     /* set "a" values into matrix */
3623     ishift = bs2*a->i[i];
3624     for (j=0; j<bs; j++) {
3625       kmax = rowlengths[i*bs+j];
3626       for (k=0; k<kmax; k++) {
3627         tmp       = jj[nzcountb]/bs;
3628         block     = mask[tmp] - 1;
3629         point     = jj[nzcountb] - bs*tmp;
3630         idx       = ishift + bs2*block + j + bs*point;
3631         a->a[idx] = (MatScalar)aa[nzcountb++];
3632       }
3633     }
3634     /* zero out the mask elements we set */
3635     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3636   }
3637   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3638 
3639   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3640   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3641   ierr = PetscFree(aa);CHKERRQ(ierr);
3642   ierr = PetscFree(jj);CHKERRQ(ierr);
3643   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3644 
3645   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3646   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3647   PetscFunctionReturn(0);
3648 }
3649 
3650 #undef __FUNCT__
3651 #define __FUNCT__ "MatCreateSeqBAIJ"
3652 /*@C
3653    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3654    compressed row) format.  For good matrix assembly performance the
3655    user should preallocate the matrix storage by setting the parameter nz
3656    (or the array nnz).  By setting these parameters accurately, performance
3657    during matrix assembly can be increased by more than a factor of 50.
3658 
3659    Collective on MPI_Comm
3660 
3661    Input Parameters:
3662 +  comm - MPI communicator, set to PETSC_COMM_SELF
3663 .  bs - size of block
3664 .  m - number of rows
3665 .  n - number of columns
3666 .  nz - number of nonzero blocks  per block row (same for all rows)
3667 -  nnz - array containing the number of nonzero blocks in the various block rows
3668          (possibly different for each block row) or PETSC_NULL
3669 
3670    Output Parameter:
3671 .  A - the matrix
3672 
3673    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3674    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3675    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3676 
3677    Options Database Keys:
3678 .   -mat_no_unroll - uses code that does not unroll the loops in the
3679                      block calculations (much slower)
3680 .    -mat_block_size - size of the blocks to use
3681 
3682    Level: intermediate
3683 
3684    Notes:
3685    The number of rows and columns must be divisible by blocksize.
3686 
3687    If the nnz parameter is given then the nz parameter is ignored
3688 
3689    A nonzero block is any block that as 1 or more nonzeros in it
3690 
3691    The block AIJ format is fully compatible with standard Fortran 77
3692    storage.  That is, the stored row and column indices can begin at
3693    either one (as in Fortran) or zero.  See the users' manual for details.
3694 
3695    Specify the preallocated storage with either nz or nnz (not both).
3696    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3697    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3698    matrices.
3699 
3700 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3701 @*/
3702 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3703 {
3704   PetscErrorCode ierr;
3705 
3706   PetscFunctionBegin;
3707   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3708   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3709   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3710   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3711   PetscFunctionReturn(0);
3712 }
3713 
3714 #undef __FUNCT__
3715 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3716 /*@C
3717    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3718    per row in the matrix. For good matrix assembly performance the
3719    user should preallocate the matrix storage by setting the parameter nz
3720    (or the array nnz).  By setting these parameters accurately, performance
3721    during matrix assembly can be increased by more than a factor of 50.
3722 
3723    Collective on MPI_Comm
3724 
3725    Input Parameters:
3726 +  A - the matrix
3727 .  bs - size of block
3728 .  nz - number of block nonzeros per block row (same for all rows)
3729 -  nnz - array containing the number of block nonzeros in the various block rows
3730          (possibly different for each block row) or PETSC_NULL
3731 
3732    Options Database Keys:
3733 .   -mat_no_unroll - uses code that does not unroll the loops in the
3734                      block calculations (much slower)
3735 .    -mat_block_size - size of the blocks to use
3736 
3737    Level: intermediate
3738 
3739    Notes:
3740    If the nnz parameter is given then the nz parameter is ignored
3741 
3742    You can call MatGetInfo() to get information on how effective the preallocation was;
3743    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3744    You can also run with the option -info and look for messages with the string
3745    malloc in them to see if additional memory allocation was needed.
3746 
3747    The block AIJ format is fully compatible with standard Fortran 77
3748    storage.  That is, the stored row and column indices can begin at
3749    either one (as in Fortran) or zero.  See the users' manual for details.
3750 
3751    Specify the preallocated storage with either nz or nnz (not both).
3752    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3753    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3754 
3755 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3756 @*/
3757 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3758 {
3759   PetscErrorCode ierr;
3760 
3761   PetscFunctionBegin;
3762   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3763   PetscValidType(B,1);
3764   PetscValidLogicalCollectiveInt(B,bs,2);
3765   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3766   PetscFunctionReturn(0);
3767 }
3768 
3769 #undef __FUNCT__
3770 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3771 /*@C
3772    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3773    (the default sequential PETSc format).
3774 
3775    Collective on MPI_Comm
3776 
3777    Input Parameters:
3778 +  A - the matrix
3779 .  i - the indices into j for the start of each local row (starts with zero)
3780 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3781 -  v - optional values in the matrix
3782 
3783    Level: developer
3784 
3785 .keywords: matrix, aij, compressed row, sparse
3786 
3787 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3788 @*/
3789 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3790 {
3791   PetscErrorCode ierr;
3792 
3793   PetscFunctionBegin;
3794   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3795   PetscValidType(B,1);
3796   PetscValidLogicalCollectiveInt(B,bs,2);
3797   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3798   PetscFunctionReturn(0);
3799 }
3800 
3801 
3802 #undef __FUNCT__
3803 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3804 /*@
3805      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3806 
3807      Collective on MPI_Comm
3808 
3809    Input Parameters:
3810 +  comm - must be an MPI communicator of size 1
3811 .  bs - size of block
3812 .  m - number of rows
3813 .  n - number of columns
3814 .  i - row indices
3815 .  j - column indices
3816 -  a - matrix values
3817 
3818    Output Parameter:
3819 .  mat - the matrix
3820 
3821    Level: advanced
3822 
3823    Notes:
3824        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3825     once the matrix is destroyed
3826 
3827        You cannot set new nonzero locations into this matrix, that will generate an error.
3828 
3829        The i and j indices are 0 based
3830 
3831        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3832 
3833 
3834 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3835 
3836 @*/
3837 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3838 {
3839   PetscErrorCode ierr;
3840   PetscInt       ii;
3841   Mat_SeqBAIJ    *baij;
3842 
3843   PetscFunctionBegin;
3844   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3845   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3846 
3847   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3848   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3849   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3850   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3851   baij = (Mat_SeqBAIJ*)(*mat)->data;
3852   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3853   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3854 
3855   baij->i = i;
3856   baij->j = j;
3857   baij->a = a;
3858 
3859   baij->singlemalloc = PETSC_FALSE;
3860   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3861   baij->free_a       = PETSC_FALSE;
3862   baij->free_ij      = PETSC_FALSE;
3863 
3864   for (ii=0; ii<m; ii++) {
3865     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3866 #if defined(PETSC_USE_DEBUG)
3867     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3868 #endif
3869   }
3870 #if defined(PETSC_USE_DEBUG)
3871   for (ii=0; ii<baij->i[m]; ii++) {
3872     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3873     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3874   }
3875 #endif
3876 
3877   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3878   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3879   PetscFunctionReturn(0);
3880 }
3881