xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 8b83055f287c8deb1a16325ae3faee24d0648b7a)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc-private/kernels/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar       *x,x1,s1;
133   const PetscScalar *b;
134   const MatScalar   *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode    ierr;
136   PetscInt          m = a->mbs,i,i2,nz,j;
137   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
157       x[0]   = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v  = aa + ai[i];
162         vi = aj + ai[i];
163         nz = diag[i] - ai[i];
164         s1 = b[i2];
165         for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]];
166         x[i2]  = idiag[0]*s1;
167         idiag += 1;
168         i2    += 1;
169       }
170       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
171       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
172     }
173     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
174         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
175       i2    = 0;
176       mdiag = a->idiag+a->mbs;
177       for (i=0; i<m; i++) {
178         x1     = x[i2];
179         x[i2]  = mdiag[0]*x1;
180         mdiag += 1;
181         i2    += 1;
182       }
183       ierr = PetscLogFlops(m);CHKERRQ(ierr);
184     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
185       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
186     }
187     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
188       idiag  = a->idiag+a->mbs - 1;
189       i2     = m - 1;
190       x1     = x[i2];
191       x[i2]  = idiag[0]*x1;
192       idiag -= 1;
193       i2    -= 1;
194       for (i=m-2; i>=0; i--) {
195         v  = aa + (diag[i]+1);
196         vi = aj + diag[i] + 1;
197         nz = ai[i+1] - diag[i] - 1;
198         s1 = x[i2];
199         for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]];
200         x[i2]  = idiag[0]*s1;
201         idiag -= 1;
202         i2    -= 1;
203       }
204       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
205     }
206   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
207   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
208   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
214 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
215 {
216   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
217   PetscScalar       *x,x1,x2,s1,s2;
218   const PetscScalar *b;
219   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
220   PetscErrorCode    ierr;
221   PetscInt          m = a->mbs,i,i2,nz,idx,j,it;
222   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
223 
224   PetscFunctionBegin;
225   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
226   its = its*lits;
227   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
228   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
229   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
230   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
231   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
232 
233   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
234 
235   diag  = a->diag;
236   idiag = a->idiag;
237   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
238   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
239 
240   if (flag & SOR_ZERO_INITIAL_GUESS) {
241     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
242       x[0]   = b[0]*idiag[0] + b[1]*idiag[2];
243       x[1]   = b[0]*idiag[1] + b[1]*idiag[3];
244       i2     = 2;
245       idiag += 4;
246       for (i=1; i<m; i++) {
247         v  = aa + 4*ai[i];
248         vi = aj + ai[i];
249         nz = diag[i] - ai[i];
250         s1 = b[i2]; s2 = b[i2+1];
251         for (j=0; j<nz; j++) {
252           idx = 2*vi[j];
253           it  = 4*j;
254           x1  = x[idx]; x2 = x[1+idx];
255           s1 -= v[it]*x1 + v[it+2]*x2;
256           s2 -= v[it+1]*x1 + v[it+3]*x2;
257         }
258         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
259         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
260         idiag  += 4;
261         i2     += 2;
262       }
263       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
264       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
265     }
266     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
267         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
268       i2    = 0;
269       mdiag = a->idiag+4*a->mbs;
270       for (i=0; i<m; i++) {
271         x1      = x[i2]; x2 = x[i2+1];
272         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
273         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
274         mdiag  += 4;
275         i2     += 2;
276       }
277       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
278     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
279       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
280     }
281     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
282       idiag   = a->idiag+4*a->mbs - 4;
283       i2      = 2*m - 2;
284       x1      = x[i2]; x2 = x[i2+1];
285       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
286       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
287       idiag  -= 4;
288       i2     -= 2;
289       for (i=m-2; i>=0; i--) {
290         v  = aa + 4*(diag[i]+1);
291         vi = aj + diag[i] + 1;
292         nz = ai[i+1] - diag[i] - 1;
293         s1 = x[i2]; s2 = x[i2+1];
294         for (j=0; j<nz; j++) {
295           idx = 2*vi[j];
296           it  = 4*j;
297           x1  = x[idx]; x2 = x[1+idx];
298           s1 -= v[it]*x1 + v[it+2]*x2;
299           s2 -= v[it+1]*x1 + v[it+3]*x2;
300         }
301         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
302         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
303         idiag  -= 4;
304         i2     -= 2;
305       }
306       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
307     }
308   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
309   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
310   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
311   PetscFunctionReturn(0);
312 }
313 
314 #undef __FUNCT__
315 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
316 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
317 {
318   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
319   PetscScalar       *x,x1,x2,x3,s1,s2,s3;
320   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
321   const PetscScalar *b;
322   PetscErrorCode    ierr;
323   PetscInt          m = a->mbs,i,i2,nz,idx;
324   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
325 
326   PetscFunctionBegin;
327   its = its*lits;
328   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
329   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
330   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
331   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
332   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
333   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
334 
335   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
336 
337   diag  = a->diag;
338   idiag = a->idiag;
339   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
340   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
341 
342   if (flag & SOR_ZERO_INITIAL_GUESS) {
343     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
344       x[0]   = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
345       x[1]   = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
346       x[2]   = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
347       i2     = 3;
348       idiag += 9;
349       for (i=1; i<m; i++) {
350         v  = aa + 9*ai[i];
351         vi = aj + ai[i];
352         nz = diag[i] - ai[i];
353         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
354         while (nz--) {
355           idx = 3*(*vi++);
356           x1  = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
357           s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
358           s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
359           s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
360           v  += 9;
361         }
362         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
363         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
364         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
365         idiag  += 9;
366         i2     += 3;
367       }
368       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
369       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
370     }
371     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
372         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
373       i2    = 0;
374       mdiag = a->idiag+9*a->mbs;
375       for (i=0; i<m; i++) {
376         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
377         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
378         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
379         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
380         mdiag  += 9;
381         i2     += 3;
382       }
383       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
384     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
385       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
386     }
387     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
388       idiag   = a->idiag+9*a->mbs - 9;
389       i2      = 3*m - 3;
390       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
391       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
392       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
393       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
394       idiag  -= 9;
395       i2     -= 3;
396       for (i=m-2; i>=0; i--) {
397         v  = aa + 9*(diag[i]+1);
398         vi = aj + diag[i] + 1;
399         nz = ai[i+1] - diag[i] - 1;
400         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
401         while (nz--) {
402           idx = 3*(*vi++);
403           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
404           s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
405           s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
406           s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
407           v  += 9;
408         }
409         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
410         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
411         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
412         idiag  -= 9;
413         i2     -= 3;
414       }
415       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
416     }
417   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
418   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
419   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
420   PetscFunctionReturn(0);
421 }
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
425 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
426 {
427   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
428   PetscScalar       *x,x1,x2,x3,x4,s1,s2,s3,s4;
429   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
430   const PetscScalar *b;
431   PetscErrorCode    ierr;
432   PetscInt          m = a->mbs,i,i2,nz,idx;
433   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
434 
435   PetscFunctionBegin;
436   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
437   its = its*lits;
438   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
439   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
440   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
441   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
442   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
443 
444   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
445 
446   diag  = a->diag;
447   idiag = a->idiag;
448   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
449   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
450 
451   if (flag & SOR_ZERO_INITIAL_GUESS) {
452     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
453       x[0]   = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
454       x[1]   = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
455       x[2]   = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
456       x[3]   = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
457       i2     = 4;
458       idiag += 16;
459       for (i=1; i<m; i++) {
460         v  = aa + 16*ai[i];
461         vi = aj + ai[i];
462         nz = diag[i] - ai[i];
463         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
464         while (nz--) {
465           idx = 4*(*vi++);
466           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
467           s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
468           s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
469           s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
470           s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
471           v  += 16;
472         }
473         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
474         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
475         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
476         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
477         idiag  += 16;
478         i2     += 4;
479       }
480       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
481       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
482     }
483     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
484         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
485       i2    = 0;
486       mdiag = a->idiag+16*a->mbs;
487       for (i=0; i<m; i++) {
488         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
489         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
490         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
491         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
492         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
493         mdiag  += 16;
494         i2     += 4;
495       }
496       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
497     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
498       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
499     }
500     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
501       idiag   = a->idiag+16*a->mbs - 16;
502       i2      = 4*m - 4;
503       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
504       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
505       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
506       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
507       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
508       idiag  -= 16;
509       i2     -= 4;
510       for (i=m-2; i>=0; i--) {
511         v  = aa + 16*(diag[i]+1);
512         vi = aj + diag[i] + 1;
513         nz = ai[i+1] - diag[i] - 1;
514         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
515         while (nz--) {
516           idx = 4*(*vi++);
517           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
518           s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
519           s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
520           s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
521           s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
522           v  += 16;
523         }
524         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
525         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
526         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
527         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
528         idiag  -= 16;
529         i2     -= 4;
530       }
531       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
532     }
533   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
534   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
535   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
541 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
542 {
543   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
544   PetscScalar       *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
545   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
546   const PetscScalar *b;
547   PetscErrorCode    ierr;
548   PetscInt          m = a->mbs,i,i2,nz,idx;
549   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
550 
551   PetscFunctionBegin;
552   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
553   its = its*lits;
554   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
555   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
556   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
557   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
558   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
559 
560   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
561 
562   diag  = a->diag;
563   idiag = a->idiag;
564   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
565   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
566 
567   if (flag & SOR_ZERO_INITIAL_GUESS) {
568     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
569       x[0]   = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
570       x[1]   = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
571       x[2]   = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
572       x[3]   = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
573       x[4]   = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
574       i2     = 5;
575       idiag += 25;
576       for (i=1; i<m; i++) {
577         v  = aa + 25*ai[i];
578         vi = aj + ai[i];
579         nz = diag[i] - ai[i];
580         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
581         while (nz--) {
582           idx = 5*(*vi++);
583           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
584           s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
585           s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
586           s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
587           s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
588           s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
589           v  += 25;
590         }
591         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
592         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
593         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
594         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
595         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
596         idiag  += 25;
597         i2     += 5;
598       }
599       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
600       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
601     }
602     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
603         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
604       i2    = 0;
605       mdiag = a->idiag+25*a->mbs;
606       for (i=0; i<m; i++) {
607         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
608         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
609         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
610         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
611         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
612         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
613         mdiag  += 25;
614         i2     += 5;
615       }
616       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
617     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
618       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
619     }
620     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
621       idiag   = a->idiag+25*a->mbs - 25;
622       i2      = 5*m - 5;
623       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
624       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
625       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
626       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
627       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
628       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
629       idiag  -= 25;
630       i2     -= 5;
631       for (i=m-2; i>=0; i--) {
632         v  = aa + 25*(diag[i]+1);
633         vi = aj + diag[i] + 1;
634         nz = ai[i+1] - diag[i] - 1;
635         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
636         while (nz--) {
637           idx = 5*(*vi++);
638           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
639           s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
640           s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
641           s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
642           s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
643           s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
644           v  += 25;
645         }
646         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
647         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
648         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
649         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
650         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
651         idiag  -= 25;
652         i2     -= 5;
653       }
654       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
655     }
656   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
657   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
658   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
659   PetscFunctionReturn(0);
660 }
661 
662 #undef __FUNCT__
663 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
664 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
665 {
666   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
667   PetscScalar       *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
668   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
669   const PetscScalar *b;
670   PetscErrorCode    ierr;
671   PetscInt          m = a->mbs,i,i2,nz,idx;
672   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
673 
674   PetscFunctionBegin;
675   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
676   its = its*lits;
677   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
678   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
679   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
680   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
681   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
682 
683   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
684 
685   diag  = a->diag;
686   idiag = a->idiag;
687   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
688   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
689 
690   if (flag & SOR_ZERO_INITIAL_GUESS) {
691     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
692       x[0]   = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
693       x[1]   = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
694       x[2]   = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
695       x[3]   = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
696       x[4]   = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
697       x[5]   = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
698       i2     = 6;
699       idiag += 36;
700       for (i=1; i<m; i++) {
701         v  = aa + 36*ai[i];
702         vi = aj + ai[i];
703         nz = diag[i] - ai[i];
704         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
705         while (nz--) {
706           idx = 6*(*vi++);
707           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
708           s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
709           s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
710           s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
711           s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
712           s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
713           s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
714           v  += 36;
715         }
716         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
717         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
718         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
719         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
720         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
721         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
722         idiag  += 36;
723         i2     += 6;
724       }
725       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
726       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
727     }
728     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
729         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
730       i2    = 0;
731       mdiag = a->idiag+36*a->mbs;
732       for (i=0; i<m; i++) {
733         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
734         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
735         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
736         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
737         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
738         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
739         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
740         mdiag  += 36;
741         i2     += 6;
742       }
743       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
744     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
745       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
746     }
747     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
748       idiag   = a->idiag+36*a->mbs - 36;
749       i2      = 6*m - 6;
750       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
751       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
752       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
753       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
754       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
755       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
756       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
757       idiag  -= 36;
758       i2     -= 6;
759       for (i=m-2; i>=0; i--) {
760         v  = aa + 36*(diag[i]+1);
761         vi = aj + diag[i] + 1;
762         nz = ai[i+1] - diag[i] - 1;
763         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
764         while (nz--) {
765           idx = 6*(*vi++);
766           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
767           s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
768           s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
769           s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
770           s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
771           s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
772           s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
773           v  += 36;
774         }
775         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
776         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
777         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
778         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
779         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
780         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
781         idiag  -= 36;
782         i2     -= 6;
783       }
784       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
785     }
786   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
787   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
788   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
789   PetscFunctionReturn(0);
790 }
791 
792 #undef __FUNCT__
793 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
794 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
795 {
796   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
797   PetscScalar       *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
798   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
799   const PetscScalar *b;
800   PetscErrorCode    ierr;
801   PetscInt          m = a->mbs,i,i2,nz,idx;
802   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
803 
804   PetscFunctionBegin;
805   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
806   its = its*lits;
807   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
808   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
809   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
810   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
811   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
812 
813   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
814 
815   diag  = a->diag;
816   idiag = a->idiag;
817   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
818   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
819 
820   if (flag & SOR_ZERO_INITIAL_GUESS) {
821     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
822       x[0]   = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
823       x[1]   = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
824       x[2]   = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
825       x[3]   = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
826       x[4]   = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
827       x[5]   = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
828       x[6]   = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
829       i2     = 7;
830       idiag += 49;
831       for (i=1; i<m; i++) {
832         v  = aa + 49*ai[i];
833         vi = aj + ai[i];
834         nz = diag[i] - ai[i];
835         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
836         while (nz--) {
837           idx = 7*(*vi++);
838           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
839           s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
840           s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
841           s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
842           s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
843           s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
844           s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
845           s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
846           v  += 49;
847         }
848         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
849         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
850         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
851         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
852         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
853         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
854         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
855         idiag  += 49;
856         i2     += 7;
857       }
858       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
859       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
860     }
861     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
862         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
863       i2    = 0;
864       mdiag = a->idiag+49*a->mbs;
865       for (i=0; i<m; i++) {
866         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
867         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
868         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
869         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
870         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
871         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
872         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
873         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
874         mdiag  += 49;
875         i2     += 7;
876       }
877       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
878     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
879       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
880     }
881     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
882       idiag   = a->idiag+49*a->mbs - 49;
883       i2      = 7*m - 7;
884       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
885       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
886       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
887       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
888       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
889       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
890       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
891       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
892       idiag  -= 49;
893       i2     -= 7;
894       for (i=m-2; i>=0; i--) {
895         v  = aa + 49*(diag[i]+1);
896         vi = aj + diag[i] + 1;
897         nz = ai[i+1] - diag[i] - 1;
898         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
899         while (nz--) {
900           idx = 7*(*vi++);
901           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
902           s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
903           s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
904           s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
905           s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
906           s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
907           s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
908           s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
909           v  += 49;
910         }
911         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
912         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
913         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
914         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
915         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
916         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
917         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
918         idiag  -= 49;
919         i2     -= 7;
920       }
921       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
922     }
923   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
924   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
925   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
926   PetscFunctionReturn(0);
927 }
928 
929 #undef __FUNCT__
930 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
931 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
932 {
933   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
934   PetscScalar       *x,*work,*w,*workt;
935   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
936   const PetscScalar *b;
937   PetscErrorCode    ierr;
938   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
939   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
940 
941   PetscFunctionBegin;
942   its = its*lits;
943   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
944   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
945   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
946   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
947   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
948   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
949 
950   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
951 
952   diag  = a->diag;
953   idiag = a->idiag;
954   if (!a->mult_work) {
955     k    = PetscMax(A->rmap->n,A->cmap->n);
956     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
957   }
958   work = a->mult_work;
959   if (!a->sor_work) {
960     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
961   }
962   w = a->sor_work;
963 
964   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
965   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
966 
967   if (flag & SOR_ZERO_INITIAL_GUESS) {
968     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
969       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
970       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
971       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
972       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
973       i2     = bs;
974       idiag += bs2;
975       for (i=1; i<m; i++) {
976         v  = aa + bs2*ai[i];
977         vi = aj + ai[i];
978         nz = diag[i] - ai[i];
979 
980         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
981         /* copy all rows of x that are needed into contiguous space */
982         workt = work;
983         for (j=0; j<nz; j++) {
984           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
985           workt += bs;
986         }
987         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
988         /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
989          while (nz--) {
990            idx  = N*(*vi++);
991            x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
992            s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
993            s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
994            s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
995            v   += N2;
996            } */
997 
998         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
999         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1000         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1001         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1002 
1003         idiag += bs2;
1004         i2    += bs;
1005       }
1006       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1007       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1008     }
1009     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1010         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1011       i2    = 0;
1012       mdiag = a->idiag+bs2*a->mbs;
1013       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1014       for (i=0; i<m; i++) {
1015         PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1016         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1017         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1018         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1019         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1020 
1021         mdiag += bs2;
1022         i2    += bs;
1023       }
1024       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1025     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1026       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1027     }
1028     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1029       idiag = a->idiag+bs2*a->mbs - bs2;
1030       i2    = bs*m - bs;
1031       ierr  = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1032       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1033       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1034       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1035       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1036       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1037       idiag -= bs2;
1038       i2    -= bs;
1039       for (i=m-2; i>=0; i--) {
1040         v  = aa + bs2*(diag[i]+1);
1041         vi = aj + diag[i] + 1;
1042         nz = ai[i+1] - diag[i] - 1;
1043 
1044         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1045         /* copy all rows of x that are needed into contiguous space */
1046         workt = work;
1047         for (j=0; j<nz; j++) {
1048           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1049           workt += bs;
1050         }
1051         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1052         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1053         while (nz--) {
1054           idx  = N*(*vi++);
1055           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1056           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1057           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1058           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1059           v   += N2;
1060           } */
1061 
1062         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1063         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1064         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1065         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1066         idiag -= bs2;
1067         i2    -= bs;
1068       }
1069       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1070     }
1071   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1072   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1073   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 /*
1078     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1079 */
1080 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1081 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1082 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1083 #define matsetvaluesblocked4_ matsetvaluesblocked4
1084 #endif
1085 
1086 #undef __FUNCT__
1087 #define __FUNCT__ "matsetvaluesblocked4_"
1088 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1089 {
1090   Mat               A  = *AA;
1091   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1092   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1093   PetscInt          *ai    =a->i,*ailen=a->ilen;
1094   PetscInt          *aj    =a->j,stepval,lastcol = -1;
1095   const PetscScalar *value = v;
1096   MatScalar         *ap,*aa = a->a,*bap;
1097 
1098   PetscFunctionBegin;
1099   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1100   stepval = (n-1)*4;
1101   for (k=0; k<m; k++) { /* loop over added rows */
1102     row  = im[k];
1103     rp   = aj + ai[row];
1104     ap   = aa + 16*ai[row];
1105     nrow = ailen[row];
1106     low  = 0;
1107     high = nrow;
1108     for (l=0; l<n; l++) { /* loop over added columns */
1109       col = in[l];
1110       if (col <= lastcol)  low = 0;
1111       else                high = nrow;
1112       lastcol = col;
1113       value   = v + k*(stepval+4 + l)*4;
1114       while (high-low > 7) {
1115         t = (low+high)/2;
1116         if (rp[t] > col) high = t;
1117         else             low  = t;
1118       }
1119       for (i=low; i<high; i++) {
1120         if (rp[i] > col) break;
1121         if (rp[i] == col) {
1122           bap = ap +  16*i;
1123           for (ii=0; ii<4; ii++,value+=stepval) {
1124             for (jj=ii; jj<16; jj+=4) {
1125               bap[jj] += *value++;
1126             }
1127           }
1128           goto noinsert2;
1129         }
1130       }
1131       N = nrow++ - 1;
1132       high++; /* added new column index thus must search to one higher than before */
1133       /* shift up all the later entries in this row */
1134       for (ii=N; ii>=i; ii--) {
1135         rp[ii+1] = rp[ii];
1136         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1137       }
1138       if (N >= i) {
1139         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1140       }
1141       rp[i] = col;
1142       bap   = ap +  16*i;
1143       for (ii=0; ii<4; ii++,value+=stepval) {
1144         for (jj=ii; jj<16; jj+=4) {
1145           bap[jj] = *value++;
1146         }
1147       }
1148       noinsert2:;
1149       low = i;
1150     }
1151     ailen[row] = nrow;
1152   }
1153   PetscFunctionReturnVoid();
1154 }
1155 
1156 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1157 #define matsetvalues4_ MATSETVALUES4
1158 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1159 #define matsetvalues4_ matsetvalues4
1160 #endif
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatSetValues4_"
1164 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1165 {
1166   Mat         A  = *AA;
1167   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1168   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1169   PetscInt    *ai=a->i,*ailen=a->ilen;
1170   PetscInt    *aj=a->j,brow,bcol;
1171   PetscInt    ridx,cidx,lastcol = -1;
1172   MatScalar   *ap,value,*aa=a->a,*bap;
1173 
1174   PetscFunctionBegin;
1175   for (k=0; k<m; k++) { /* loop over added rows */
1176     row  = im[k]; brow = row/4;
1177     rp   = aj + ai[brow];
1178     ap   = aa + 16*ai[brow];
1179     nrow = ailen[brow];
1180     low  = 0;
1181     high = nrow;
1182     for (l=0; l<n; l++) { /* loop over added columns */
1183       col   = in[l]; bcol = col/4;
1184       ridx  = row % 4; cidx = col % 4;
1185       value = v[l + k*n];
1186       if (col <= lastcol)  low = 0;
1187       else                high = nrow;
1188       lastcol = col;
1189       while (high-low > 7) {
1190         t = (low+high)/2;
1191         if (rp[t] > bcol) high = t;
1192         else              low  = t;
1193       }
1194       for (i=low; i<high; i++) {
1195         if (rp[i] > bcol) break;
1196         if (rp[i] == bcol) {
1197           bap   = ap +  16*i + 4*cidx + ridx;
1198           *bap += value;
1199           goto noinsert1;
1200         }
1201       }
1202       N = nrow++ - 1;
1203       high++; /* added new column thus must search to one higher than before */
1204       /* shift up all the later entries in this row */
1205       for (ii=N; ii>=i; ii--) {
1206         rp[ii+1] = rp[ii];
1207         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1208       }
1209       if (N>=i) {
1210         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1211       }
1212       rp[i]                    = bcol;
1213       ap[16*i + 4*cidx + ridx] = value;
1214 noinsert1:;
1215       low = i;
1216     }
1217     ailen[brow] = nrow;
1218   }
1219   PetscFunctionReturnVoid();
1220 }
1221 
1222 /*
1223      Checks for missing diagonals
1224 */
1225 #undef __FUNCT__
1226 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1227 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1228 {
1229   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1230   PetscErrorCode ierr;
1231   PetscInt       *diag,*jj = a->j,i;
1232 
1233   PetscFunctionBegin;
1234   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1235   *missing = PETSC_FALSE;
1236   if (A->rmap->n > 0 && !jj) {
1237     *missing = PETSC_TRUE;
1238     if (d) *d = 0;
1239     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1240   } else {
1241     diag = a->diag;
1242     for (i=0; i<a->mbs; i++) {
1243       if (jj[diag[i]] != i) {
1244         *missing = PETSC_TRUE;
1245         if (d) *d = i;
1246         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1247         break;
1248       }
1249     }
1250   }
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1256 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1257 {
1258   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1259   PetscErrorCode ierr;
1260   PetscInt       i,j,m = a->mbs;
1261 
1262   PetscFunctionBegin;
1263   if (!a->diag) {
1264     ierr         = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1265     ierr         = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1266     a->free_diag = PETSC_TRUE;
1267   }
1268   for (i=0; i<m; i++) {
1269     a->diag[i] = a->i[i+1];
1270     for (j=a->i[i]; j<a->i[i+1]; j++) {
1271       if (a->j[j] == i) {
1272         a->diag[i] = j;
1273         break;
1274       }
1275     }
1276   }
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1283 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1284 {
1285   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1286   PetscErrorCode ierr;
1287   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1288   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1289 
1290   PetscFunctionBegin;
1291   *nn = n;
1292   if (!ia) PetscFunctionReturn(0);
1293   if (symmetric) {
1294     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1295     nz   = tia[n];
1296   } else {
1297     tia = a->i; tja = a->j;
1298   }
1299 
1300   if (!blockcompressed && bs > 1) {
1301     (*nn) *= bs;
1302     /* malloc & create the natural set of indices */
1303     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1304     if (n) {
1305       (*ia)[0] = 0;
1306       for (j=1; j<bs; j++) {
1307         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1308       }
1309     }
1310 
1311     for (i=1; i<n; i++) {
1312       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1313       for (j=1; j<bs; j++) {
1314         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1315       }
1316     }
1317     if (n) {
1318       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1319     }
1320 
1321     if (inja) {
1322       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1323       cnt = 0;
1324       for (i=0; i<n; i++) {
1325         for (j=0; j<bs; j++) {
1326           for (k=tia[i]; k<tia[i+1]; k++) {
1327             for (l=0; l<bs; l++) {
1328               (*ja)[cnt++] = bs*tja[k] + l;
1329             }
1330           }
1331         }
1332       }
1333     }
1334 
1335     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1336       ierr = PetscFree(tia);CHKERRQ(ierr);
1337       ierr = PetscFree(tja);CHKERRQ(ierr);
1338     }
1339   } else if (oshift == 1) {
1340     if (symmetric) {
1341       nz = tia[A->rmap->n/bs];
1342       /*  add 1 to i and j indices */
1343       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1344       *ia = tia;
1345       if (ja) {
1346         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1347         *ja = tja;
1348       }
1349     } else {
1350       nz = a->i[A->rmap->n/bs];
1351       /* malloc space and  add 1 to i and j indices */
1352       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1353       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1354       if (ja) {
1355         ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1356         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1357       }
1358     }
1359   } else {
1360     *ia = tia;
1361     if (ja) *ja = tja;
1362   }
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1368 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1369 {
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   if (!ia) PetscFunctionReturn(0);
1374   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1375     ierr = PetscFree(*ia);CHKERRQ(ierr);
1376     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1377   }
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #undef __FUNCT__
1382 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1383 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1384 {
1385   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1386   PetscErrorCode ierr;
1387 
1388   PetscFunctionBegin;
1389 #if defined(PETSC_USE_LOG)
1390   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1391 #endif
1392   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1393   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1394   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1395   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1396   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1397   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1398   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1399   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1400   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1401   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1402   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1403   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1404   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1405 
1406   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1407   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1408   ierr = PetscFree(A->data);CHKERRQ(ierr);
1409 
1410   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1411   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1412   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1413   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1414   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1415   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1416   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1417   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1418   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1419   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1420   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1426 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1427 {
1428   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1429   PetscErrorCode ierr;
1430 
1431   PetscFunctionBegin;
1432   switch (op) {
1433   case MAT_ROW_ORIENTED:
1434     a->roworiented = flg;
1435     break;
1436   case MAT_KEEP_NONZERO_PATTERN:
1437     a->keepnonzeropattern = flg;
1438     break;
1439   case MAT_NEW_NONZERO_LOCATIONS:
1440     a->nonew = (flg ? 0 : 1);
1441     break;
1442   case MAT_NEW_NONZERO_LOCATION_ERR:
1443     a->nonew = (flg ? -1 : 0);
1444     break;
1445   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1446     a->nonew = (flg ? -2 : 0);
1447     break;
1448   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1449     a->nounused = (flg ? -1 : 0);
1450     break;
1451   case MAT_CHECK_COMPRESSED_ROW:
1452     a->compressedrow.check = flg;
1453     break;
1454   case MAT_NEW_DIAGONALS:
1455   case MAT_IGNORE_OFF_PROC_ENTRIES:
1456   case MAT_USE_HASH_TABLE:
1457     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1458     break;
1459   case MAT_SPD:
1460   case MAT_SYMMETRIC:
1461   case MAT_STRUCTURALLY_SYMMETRIC:
1462   case MAT_HERMITIAN:
1463   case MAT_SYMMETRY_ETERNAL:
1464     /* These options are handled directly by MatSetOption() */
1465     break;
1466   default:
1467     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1468   }
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 #undef __FUNCT__
1473 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1474 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1475 {
1476   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1477   PetscErrorCode ierr;
1478   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1479   MatScalar      *aa,*aa_i;
1480   PetscScalar    *v_i;
1481 
1482   PetscFunctionBegin;
1483   bs  = A->rmap->bs;
1484   ai  = a->i;
1485   aj  = a->j;
1486   aa  = a->a;
1487   bs2 = a->bs2;
1488 
1489   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1490 
1491   bn  = row/bs;   /* Block number */
1492   bp  = row % bs; /* Block Position */
1493   M   = ai[bn+1] - ai[bn];
1494   *nz = bs*M;
1495 
1496   if (v) {
1497     *v = 0;
1498     if (*nz) {
1499       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1500       for (i=0; i<M; i++) { /* for each block in the block row */
1501         v_i  = *v + i*bs;
1502         aa_i = aa + bs2*(ai[bn] + i);
1503         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1504       }
1505     }
1506   }
1507 
1508   if (idx) {
1509     *idx = 0;
1510     if (*nz) {
1511       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1512       for (i=0; i<M; i++) { /* for each block in the block row */
1513         idx_i = *idx + i*bs;
1514         itmp  = bs*aj[ai[bn] + i];
1515         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1516       }
1517     }
1518   }
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 #undef __FUNCT__
1523 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1524 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1525 {
1526   PetscErrorCode ierr;
1527 
1528   PetscFunctionBegin;
1529   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1530   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1535 
1536 #undef __FUNCT__
1537 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1538 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1539 {
1540   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1541   Mat            C;
1542   PetscErrorCode ierr;
1543   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1544   PetscInt       *rows,*cols,bs2=a->bs2;
1545   MatScalar      *array;
1546 
1547   PetscFunctionBegin;
1548   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1549   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1550     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1551     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1552 
1553     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1554     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1555     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1556     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1557     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1558     ierr = PetscFree(col);CHKERRQ(ierr);
1559   } else {
1560     C = *B;
1561   }
1562 
1563   array = a->a;
1564   ierr  = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1565   for (i=0; i<mbs; i++) {
1566     cols[0] = i*bs;
1567     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1568     len = ai[i+1] - ai[i];
1569     for (j=0; j<len; j++) {
1570       rows[0] = (*aj++)*bs;
1571       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1572       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1573       array += bs2;
1574     }
1575   }
1576   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1577 
1578   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1579   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1580 
1581   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1582     *B = C;
1583   } else {
1584     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1585   }
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1591 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1592 {
1593   PetscErrorCode ierr;
1594   Mat            Btrans;
1595 
1596   PetscFunctionBegin;
1597   *f   = PETSC_FALSE;
1598   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1599   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1600   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1601   PetscFunctionReturn(0);
1602 }
1603 
1604 #undef __FUNCT__
1605 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1606 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1607 {
1608   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1609   PetscErrorCode ierr;
1610   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1611   int            fd;
1612   PetscScalar    *aa;
1613   FILE           *file;
1614 
1615   PetscFunctionBegin;
1616   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1617   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1618   col_lens[0] = MAT_FILE_CLASSID;
1619 
1620   col_lens[1] = A->rmap->N;
1621   col_lens[2] = A->cmap->n;
1622   col_lens[3] = a->nz*bs2;
1623 
1624   /* store lengths of each row and write (including header) to file */
1625   count = 0;
1626   for (i=0; i<a->mbs; i++) {
1627     for (j=0; j<bs; j++) {
1628       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1629     }
1630   }
1631   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1632   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1633 
1634   /* store column indices (zero start index) */
1635   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1636   count = 0;
1637   for (i=0; i<a->mbs; i++) {
1638     for (j=0; j<bs; j++) {
1639       for (k=a->i[i]; k<a->i[i+1]; k++) {
1640         for (l=0; l<bs; l++) {
1641           jj[count++] = bs*a->j[k] + l;
1642         }
1643       }
1644     }
1645   }
1646   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1647   ierr = PetscFree(jj);CHKERRQ(ierr);
1648 
1649   /* store nonzero values */
1650   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1651   count = 0;
1652   for (i=0; i<a->mbs; i++) {
1653     for (j=0; j<bs; j++) {
1654       for (k=a->i[i]; k<a->i[i+1]; k++) {
1655         for (l=0; l<bs; l++) {
1656           aa[count++] = a->a[bs2*k + l*bs + j];
1657         }
1658       }
1659     }
1660   }
1661   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1662   ierr = PetscFree(aa);CHKERRQ(ierr);
1663 
1664   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1665   if (file) {
1666     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1667   }
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 #undef __FUNCT__
1672 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1673 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1674 {
1675   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1676   PetscErrorCode    ierr;
1677   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1678   PetscViewerFormat format;
1679 
1680   PetscFunctionBegin;
1681   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1682   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1683     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1684   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1685     Mat aij;
1686     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1687     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1688     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1689   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1690       PetscFunctionReturn(0);
1691   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1692     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1693     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1694     for (i=0; i<a->mbs; i++) {
1695       for (j=0; j<bs; j++) {
1696         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1697         for (k=a->i[i]; k<a->i[i+1]; k++) {
1698           for (l=0; l<bs; l++) {
1699 #if defined(PETSC_USE_COMPLEX)
1700             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1701               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1702                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1703             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1704               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1705                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1706             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1707               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1708             }
1709 #else
1710             if (a->a[bs2*k + l*bs + j] != 0.0) {
1711               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1712             }
1713 #endif
1714           }
1715         }
1716         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1717       }
1718     }
1719     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1720   } else {
1721     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1722     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1723     for (i=0; i<a->mbs; i++) {
1724       for (j=0; j<bs; j++) {
1725         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1726         for (k=a->i[i]; k<a->i[i+1]; k++) {
1727           for (l=0; l<bs; l++) {
1728 #if defined(PETSC_USE_COMPLEX)
1729             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1730               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1731                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1732             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1733               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1734                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1735             } else {
1736               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1737             }
1738 #else
1739             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1740 #endif
1741           }
1742         }
1743         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1744       }
1745     }
1746     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1747   }
1748   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 #include <petscdraw.h>
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1755 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1756 {
1757   Mat               A = (Mat) Aa;
1758   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1759   PetscErrorCode    ierr;
1760   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1761   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1762   MatScalar         *aa;
1763   PetscViewer       viewer;
1764   PetscViewerFormat format;
1765 
1766   PetscFunctionBegin;
1767   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1768   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1769 
1770   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1771 
1772   /* loop over matrix elements drawing boxes */
1773 
1774   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1775     color = PETSC_DRAW_BLUE;
1776     for (i=0,row=0; i<mbs; i++,row+=bs) {
1777       for (j=a->i[i]; j<a->i[i+1]; j++) {
1778         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1779         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1780         aa  = a->a + j*bs2;
1781         for (k=0; k<bs; k++) {
1782           for (l=0; l<bs; l++) {
1783             if (PetscRealPart(*aa++) >=  0.) continue;
1784             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1785           }
1786         }
1787       }
1788     }
1789     color = PETSC_DRAW_CYAN;
1790     for (i=0,row=0; i<mbs; i++,row+=bs) {
1791       for (j=a->i[i]; j<a->i[i+1]; j++) {
1792         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1793         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1794         aa  = a->a + j*bs2;
1795         for (k=0; k<bs; k++) {
1796           for (l=0; l<bs; l++) {
1797             if (PetscRealPart(*aa++) != 0.) continue;
1798             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1799           }
1800         }
1801       }
1802     }
1803     color = PETSC_DRAW_RED;
1804     for (i=0,row=0; i<mbs; i++,row+=bs) {
1805       for (j=a->i[i]; j<a->i[i+1]; j++) {
1806         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1807         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1808         aa  = a->a + j*bs2;
1809         for (k=0; k<bs; k++) {
1810           for (l=0; l<bs; l++) {
1811             if (PetscRealPart(*aa++) <= 0.) continue;
1812             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1813           }
1814         }
1815       }
1816     }
1817   } else {
1818     /* use contour shading to indicate magnitude of values */
1819     /* first determine max of all nonzero values */
1820     PetscDraw popup;
1821     PetscReal scale,maxv = 0.0;
1822 
1823     for (i=0; i<a->nz*a->bs2; i++) {
1824       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1825     }
1826     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1827     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1828     if (popup) {
1829       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1830     }
1831     for (i=0,row=0; i<mbs; i++,row+=bs) {
1832       for (j=a->i[i]; j<a->i[i+1]; j++) {
1833         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1834         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1835         aa  = a->a + j*bs2;
1836         for (k=0; k<bs; k++) {
1837           for (l=0; l<bs; l++) {
1838             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1839             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1840           }
1841         }
1842       }
1843     }
1844   }
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 #undef __FUNCT__
1849 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1850 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1851 {
1852   PetscErrorCode ierr;
1853   PetscReal      xl,yl,xr,yr,w,h;
1854   PetscDraw      draw;
1855   PetscBool      isnull;
1856 
1857   PetscFunctionBegin;
1858   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1859   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1860 
1861   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1862   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1863   xr  += w;    yr += h;  xl = -w;     yl = -h;
1864   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1865   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1866   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 #undef __FUNCT__
1871 #define __FUNCT__ "MatView_SeqBAIJ"
1872 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1873 {
1874   PetscErrorCode ierr;
1875   PetscBool      iascii,isbinary,isdraw;
1876 
1877   PetscFunctionBegin;
1878   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1879   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1880   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1881   if (iascii) {
1882     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1883   } else if (isbinary) {
1884     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1885   } else if (isdraw) {
1886     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1887   } else {
1888     Mat B;
1889     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1890     ierr = MatView(B,viewer);CHKERRQ(ierr);
1891     ierr = MatDestroy(&B);CHKERRQ(ierr);
1892   }
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 
1897 #undef __FUNCT__
1898 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1899 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1900 {
1901   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1902   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1903   PetscInt    *ai = a->i,*ailen = a->ilen;
1904   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1905   MatScalar   *ap,*aa = a->a;
1906 
1907   PetscFunctionBegin;
1908   for (k=0; k<m; k++) { /* loop over rows */
1909     row = im[k]; brow = row/bs;
1910     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1911     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1912     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1913     nrow = ailen[brow];
1914     for (l=0; l<n; l++) { /* loop over columns */
1915       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1916       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1917       col  = in[l];
1918       bcol = col/bs;
1919       cidx = col%bs;
1920       ridx = row%bs;
1921       high = nrow;
1922       low  = 0; /* assume unsorted */
1923       while (high-low > 5) {
1924         t = (low+high)/2;
1925         if (rp[t] > bcol) high = t;
1926         else             low  = t;
1927       }
1928       for (i=low; i<high; i++) {
1929         if (rp[i] > bcol) break;
1930         if (rp[i] == bcol) {
1931           *v++ = ap[bs2*i+bs*cidx+ridx];
1932           goto finished;
1933         }
1934       }
1935       *v++ = 0.0;
1936 finished:;
1937     }
1938   }
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1944 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1945 {
1946   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1947   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1948   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1949   PetscErrorCode    ierr;
1950   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1951   PetscBool         roworiented=a->roworiented;
1952   const PetscScalar *value     = v;
1953   MatScalar         *ap,*aa = a->a,*bap;
1954 
1955   PetscFunctionBegin;
1956   if (roworiented) {
1957     stepval = (n-1)*bs;
1958   } else {
1959     stepval = (m-1)*bs;
1960   }
1961   for (k=0; k<m; k++) { /* loop over added rows */
1962     row = im[k];
1963     if (row < 0) continue;
1964 #if defined(PETSC_USE_DEBUG)
1965     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1966 #endif
1967     rp   = aj + ai[row];
1968     ap   = aa + bs2*ai[row];
1969     rmax = imax[row];
1970     nrow = ailen[row];
1971     low  = 0;
1972     high = nrow;
1973     for (l=0; l<n; l++) { /* loop over added columns */
1974       if (in[l] < 0) continue;
1975 #if defined(PETSC_USE_DEBUG)
1976       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1977 #endif
1978       col = in[l];
1979       if (roworiented) {
1980         value = v + (k*(stepval+bs) + l)*bs;
1981       } else {
1982         value = v + (l*(stepval+bs) + k)*bs;
1983       }
1984       if (col <= lastcol) low = 0;
1985       else high = nrow;
1986       lastcol = col;
1987       while (high-low > 7) {
1988         t = (low+high)/2;
1989         if (rp[t] > col) high = t;
1990         else             low  = t;
1991       }
1992       for (i=low; i<high; i++) {
1993         if (rp[i] > col) break;
1994         if (rp[i] == col) {
1995           bap = ap +  bs2*i;
1996           if (roworiented) {
1997             if (is == ADD_VALUES) {
1998               for (ii=0; ii<bs; ii++,value+=stepval) {
1999                 for (jj=ii; jj<bs2; jj+=bs) {
2000                   bap[jj] += *value++;
2001                 }
2002               }
2003             } else {
2004               for (ii=0; ii<bs; ii++,value+=stepval) {
2005                 for (jj=ii; jj<bs2; jj+=bs) {
2006                   bap[jj] = *value++;
2007                 }
2008               }
2009             }
2010           } else {
2011             if (is == ADD_VALUES) {
2012               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2013                 for (jj=0; jj<bs; jj++) {
2014                   bap[jj] += value[jj];
2015                 }
2016                 bap += bs;
2017               }
2018             } else {
2019               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2020                 for (jj=0; jj<bs; jj++) {
2021                   bap[jj]  = value[jj];
2022                 }
2023                 bap += bs;
2024               }
2025             }
2026           }
2027           goto noinsert2;
2028         }
2029       }
2030       if (nonew == 1) goto noinsert2;
2031       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2032       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2033       N = nrow++ - 1; high++;
2034       /* shift up all the later entries in this row */
2035       for (ii=N; ii>=i; ii--) {
2036         rp[ii+1] = rp[ii];
2037         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2038       }
2039       if (N >= i) {
2040         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2041       }
2042       rp[i] = col;
2043       bap   = ap +  bs2*i;
2044       if (roworiented) {
2045         for (ii=0; ii<bs; ii++,value+=stepval) {
2046           for (jj=ii; jj<bs2; jj+=bs) {
2047             bap[jj] = *value++;
2048           }
2049         }
2050       } else {
2051         for (ii=0; ii<bs; ii++,value+=stepval) {
2052           for (jj=0; jj<bs; jj++) {
2053             *bap++ = *value++;
2054           }
2055         }
2056       }
2057 noinsert2:;
2058       low = i;
2059     }
2060     ailen[row] = nrow;
2061   }
2062   PetscFunctionReturn(0);
2063 }
2064 
2065 #undef __FUNCT__
2066 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2067 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2068 {
2069   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
2070   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2071   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
2072   PetscErrorCode ierr;
2073   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
2074   MatScalar      *aa  = a->a,*ap;
2075   PetscReal      ratio=0.6;
2076 
2077   PetscFunctionBegin;
2078   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2079 
2080   if (m) rmax = ailen[0];
2081   for (i=1; i<mbs; i++) {
2082     /* move each row back by the amount of empty slots (fshift) before it*/
2083     fshift += imax[i-1] - ailen[i-1];
2084     rmax    = PetscMax(rmax,ailen[i]);
2085     if (fshift) {
2086       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2087       N  = ailen[i];
2088       for (j=0; j<N; j++) {
2089         ip[j-fshift] = ip[j];
2090 
2091         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2092       }
2093     }
2094     ai[i] = ai[i-1] + ailen[i-1];
2095   }
2096   if (mbs) {
2097     fshift += imax[mbs-1] - ailen[mbs-1];
2098     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2099   }
2100   /* reset ilen and imax for each row */
2101   for (i=0; i<mbs; i++) {
2102     ailen[i] = imax[i] = ai[i+1] - ai[i];
2103   }
2104   a->nz = ai[mbs];
2105 
2106   /* diagonals may have moved, so kill the diagonal pointers */
2107   a->idiagvalid = PETSC_FALSE;
2108   if (fshift && a->diag) {
2109     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
2110     ierr    = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2111     a->diag = 0;
2112   }
2113   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2114   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2115   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2116   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2117 
2118   A->info.mallocs    += a->reallocs;
2119   a->reallocs         = 0;
2120   A->info.nz_unneeded = (PetscReal)fshift*bs2;
2121 
2122   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2123 
2124   A->same_nonzero = PETSC_TRUE;
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 /*
2129    This function returns an array of flags which indicate the locations of contiguous
2130    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2131    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2132    Assume: sizes should be long enough to hold all the values.
2133 */
2134 #undef __FUNCT__
2135 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2136 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2137 {
2138   PetscInt  i,j,k,row;
2139   PetscBool flg;
2140 
2141   PetscFunctionBegin;
2142   for (i=0,j=0; i<n; j++) {
2143     row = idx[i];
2144     if (row%bs!=0) { /* Not the begining of a block */
2145       sizes[j] = 1;
2146       i++;
2147     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2148       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2149       i++;
2150     } else { /* Begining of the block, so check if the complete block exists */
2151       flg = PETSC_TRUE;
2152       for (k=1; k<bs; k++) {
2153         if (row+k != idx[i+k]) { /* break in the block */
2154           flg = PETSC_FALSE;
2155           break;
2156         }
2157       }
2158       if (flg) { /* No break in the bs */
2159         sizes[j] = bs;
2160         i       += bs;
2161       } else {
2162         sizes[j] = 1;
2163         i++;
2164       }
2165     }
2166   }
2167   *bs_max = j;
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 #undef __FUNCT__
2172 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2173 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2174 {
2175   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2176   PetscErrorCode    ierr;
2177   PetscInt          i,j,k,count,*rows;
2178   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2179   PetscScalar       zero = 0.0;
2180   MatScalar         *aa;
2181   const PetscScalar *xx;
2182   PetscScalar       *bb;
2183 
2184   PetscFunctionBegin;
2185   /* fix right hand side if needed */
2186   if (x && b) {
2187     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2188     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2189     for (i=0; i<is_n; i++) {
2190       bb[is_idx[i]] = diag*xx[is_idx[i]];
2191     }
2192     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2193     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2194   }
2195 
2196   /* Make a copy of the IS and  sort it */
2197   /* allocate memory for rows,sizes */
2198   ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2199 
2200   /* copy IS values to rows, and sort them */
2201   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2202   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2203 
2204   if (baij->keepnonzeropattern) {
2205     for (i=0; i<is_n; i++) sizes[i] = 1;
2206     bs_max          = is_n;
2207     A->same_nonzero = PETSC_TRUE;
2208   } else {
2209     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2210 
2211     A->same_nonzero = PETSC_FALSE;
2212   }
2213 
2214   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2215     row = rows[j];
2216     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2217     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2218     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2219     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2220       if (diag != (PetscScalar)0.0) {
2221         if (baij->ilen[row/bs] > 0) {
2222           baij->ilen[row/bs]       = 1;
2223           baij->j[baij->i[row/bs]] = row/bs;
2224 
2225           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2226         }
2227         /* Now insert all the diagonal values for this bs */
2228         for (k=0; k<bs; k++) {
2229           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2230         }
2231       } else { /* (diag == 0.0) */
2232         baij->ilen[row/bs] = 0;
2233       } /* end (diag == 0.0) */
2234     } else { /* (sizes[i] != bs) */
2235 #if defined(PETSC_USE_DEBUG)
2236       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2237 #endif
2238       for (k=0; k<count; k++) {
2239         aa[0] =  zero;
2240         aa   += bs;
2241       }
2242       if (diag != (PetscScalar)0.0) {
2243         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2244       }
2245     }
2246   }
2247 
2248   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2249   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 #undef __FUNCT__
2254 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2255 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2256 {
2257   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2258   PetscErrorCode    ierr;
2259   PetscInt          i,j,k,count;
2260   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2261   PetscScalar       zero = 0.0;
2262   MatScalar         *aa;
2263   const PetscScalar *xx;
2264   PetscScalar       *bb;
2265   PetscBool         *zeroed,vecs = PETSC_FALSE;
2266 
2267   PetscFunctionBegin;
2268   /* fix right hand side if needed */
2269   if (x && b) {
2270     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2271     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2272     vecs = PETSC_TRUE;
2273   }
2274   A->same_nonzero = PETSC_TRUE;
2275 
2276   /* zero the columns */
2277   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2278   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2279   for (i=0; i<is_n; i++) {
2280     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2281     zeroed[is_idx[i]] = PETSC_TRUE;
2282   }
2283   for (i=0; i<A->rmap->N; i++) {
2284     if (!zeroed[i]) {
2285       row = i/bs;
2286       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2287         for (k=0; k<bs; k++) {
2288           col = bs*baij->j[j] + k;
2289           if (zeroed[col]) {
2290             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2291             if (vecs) bb[i] -= aa[0]*xx[col];
2292             aa[0] = 0.0;
2293           }
2294         }
2295       }
2296     } else if (vecs) bb[i] = diag*xx[i];
2297   }
2298   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2299   if (vecs) {
2300     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2301     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2302   }
2303 
2304   /* zero the rows */
2305   for (i=0; i<is_n; i++) {
2306     row   = is_idx[i];
2307     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2308     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2309     for (k=0; k<count; k++) {
2310       aa[0] =  zero;
2311       aa   += bs;
2312     }
2313     if (diag != (PetscScalar)0.0) {
2314       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2315     }
2316   }
2317   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 #undef __FUNCT__
2322 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2323 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2324 {
2325   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2326   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2327   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2328   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2329   PetscErrorCode ierr;
2330   PetscInt       ridx,cidx,bs2=a->bs2;
2331   PetscBool      roworiented=a->roworiented;
2332   MatScalar      *ap,value,*aa=a->a,*bap;
2333 
2334   PetscFunctionBegin;
2335   if (v) PetscValidScalarPointer(v,6);
2336   for (k=0; k<m; k++) { /* loop over added rows */
2337     row  = im[k];
2338     brow = row/bs;
2339     if (row < 0) continue;
2340 #if defined(PETSC_USE_DEBUG)
2341     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2342 #endif
2343     rp   = aj + ai[brow];
2344     ap   = aa + bs2*ai[brow];
2345     rmax = imax[brow];
2346     nrow = ailen[brow];
2347     low  = 0;
2348     high = nrow;
2349     for (l=0; l<n; l++) { /* loop over added columns */
2350       if (in[l] < 0) continue;
2351 #if defined(PETSC_USE_DEBUG)
2352       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2353 #endif
2354       col  = in[l]; bcol = col/bs;
2355       ridx = row % bs; cidx = col % bs;
2356       if (roworiented) {
2357         value = v[l + k*n];
2358       } else {
2359         value = v[k + l*m];
2360       }
2361       if (col <= lastcol) low = 0; else high = nrow;
2362       lastcol = col;
2363       while (high-low > 7) {
2364         t = (low+high)/2;
2365         if (rp[t] > bcol) high = t;
2366         else              low  = t;
2367       }
2368       for (i=low; i<high; i++) {
2369         if (rp[i] > bcol) break;
2370         if (rp[i] == bcol) {
2371           bap = ap +  bs2*i + bs*cidx + ridx;
2372           if (is == ADD_VALUES) *bap += value;
2373           else                  *bap  = value;
2374           goto noinsert1;
2375         }
2376       }
2377       if (nonew == 1) goto noinsert1;
2378       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2379       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2380       N = nrow++ - 1; high++;
2381       /* shift up all the later entries in this row */
2382       for (ii=N; ii>=i; ii--) {
2383         rp[ii+1] = rp[ii];
2384         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2385       }
2386       if (N>=i) {
2387         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2388       }
2389       rp[i]                      = bcol;
2390       ap[bs2*i + bs*cidx + ridx] = value;
2391       a->nz++;
2392 noinsert1:;
2393       low = i;
2394     }
2395     ailen[brow] = nrow;
2396   }
2397   A->same_nonzero = PETSC_FALSE;
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2403 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2404 {
2405   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2406   Mat            outA;
2407   PetscErrorCode ierr;
2408   PetscBool      row_identity,col_identity;
2409 
2410   PetscFunctionBegin;
2411   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2412   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2413   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2414   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2415 
2416   outA            = inA;
2417   inA->factortype = MAT_FACTOR_LU;
2418 
2419   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2420 
2421   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2422   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2423   a->row = row;
2424   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2425   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2426   a->col = col;
2427 
2428   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2429   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2430   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2431   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2432 
2433   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2434   if (!a->solve_work) {
2435     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2436     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2437   }
2438   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2439   PetscFunctionReturn(0);
2440 }
2441 
2442 #undef __FUNCT__
2443 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2444 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2445 {
2446   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2447   PetscInt    i,nz,mbs;
2448 
2449   PetscFunctionBegin;
2450   nz  = baij->maxnz;
2451   mbs = baij->mbs;
2452   for (i=0; i<nz; i++) {
2453     baij->j[i] = indices[i];
2454   }
2455   baij->nz = nz;
2456   for (i=0; i<mbs; i++) {
2457     baij->ilen[i] = baij->imax[i];
2458   }
2459   PetscFunctionReturn(0);
2460 }
2461 
2462 #undef __FUNCT__
2463 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2464 /*@
2465     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2466        in the matrix.
2467 
2468   Input Parameters:
2469 +  mat - the SeqBAIJ matrix
2470 -  indices - the column indices
2471 
2472   Level: advanced
2473 
2474   Notes:
2475     This can be called if you have precomputed the nonzero structure of the
2476   matrix and want to provide it to the matrix object to improve the performance
2477   of the MatSetValues() operation.
2478 
2479     You MUST have set the correct numbers of nonzeros per row in the call to
2480   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2481 
2482     MUST be called before any calls to MatSetValues();
2483 
2484 @*/
2485 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2486 {
2487   PetscErrorCode ierr;
2488 
2489   PetscFunctionBegin;
2490   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2491   PetscValidPointer(indices,2);
2492   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 #undef __FUNCT__
2497 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2498 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2499 {
2500   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2501   PetscErrorCode ierr;
2502   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2503   PetscReal      atmp;
2504   PetscScalar    *x,zero = 0.0;
2505   MatScalar      *aa;
2506   PetscInt       ncols,brow,krow,kcol;
2507 
2508   PetscFunctionBegin;
2509   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2510   bs  = A->rmap->bs;
2511   aa  = a->a;
2512   ai  = a->i;
2513   aj  = a->j;
2514   mbs = a->mbs;
2515 
2516   ierr = VecSet(v,zero);CHKERRQ(ierr);
2517   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2518   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2519   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2520   for (i=0; i<mbs; i++) {
2521     ncols = ai[1] - ai[0]; ai++;
2522     brow  = bs*i;
2523     for (j=0; j<ncols; j++) {
2524       for (kcol=0; kcol<bs; kcol++) {
2525         for (krow=0; krow<bs; krow++) {
2526           atmp = PetscAbsScalar(*aa);aa++;
2527           row  = brow + krow;   /* row index */
2528           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2529           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2530         }
2531       }
2532       aj++;
2533     }
2534   }
2535   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 #undef __FUNCT__
2540 #define __FUNCT__ "MatCopy_SeqBAIJ"
2541 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2542 {
2543   PetscErrorCode ierr;
2544 
2545   PetscFunctionBegin;
2546   /* If the two matrices have the same copy implementation, use fast copy. */
2547   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2548     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2549     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2550     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2551 
2552     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2553     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2554     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2555   } else {
2556     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2557   }
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2563 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2564 {
2565   PetscErrorCode ierr;
2566 
2567   PetscFunctionBegin;
2568   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2569   PetscFunctionReturn(0);
2570 }
2571 
2572 #undef __FUNCT__
2573 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2574 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2575 {
2576   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2577 
2578   PetscFunctionBegin;
2579   *array = a->a;
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 #undef __FUNCT__
2584 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2585 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2586 {
2587   PetscFunctionBegin;
2588   PetscFunctionReturn(0);
2589 }
2590 
2591 #undef __FUNCT__
2592 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2593 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2594 {
2595   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2596   PetscErrorCode ierr;
2597   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2598   PetscBLASInt   one=1;
2599 
2600   PetscFunctionBegin;
2601   if (str == SAME_NONZERO_PATTERN) {
2602     PetscScalar  alpha = a;
2603     PetscBLASInt bnz;
2604     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2605     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2606   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2607     if (y->xtoy && y->XtoY != X) {
2608       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2609       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2610     }
2611     if (!y->xtoy) { /* get xtoy */
2612       ierr    = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr);
2613       y->XtoY = X;
2614       ierr    = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2615     }
2616     for (i=0; i<x->nz; i++) {
2617       j = 0;
2618       while (j < bs2) {
2619         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2620         j++;
2621       }
2622     }
2623     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2624   } else {
2625     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2626   }
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 #undef __FUNCT__
2631 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2632 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2633 {
2634   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2635   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2636   MatScalar   *aa = a->a;
2637 
2638   PetscFunctionBegin;
2639   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2640   PetscFunctionReturn(0);
2641 }
2642 
2643 #undef __FUNCT__
2644 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2645 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2646 {
2647   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2648   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2649   MatScalar   *aa = a->a;
2650 
2651   PetscFunctionBegin;
2652   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2653   PetscFunctionReturn(0);
2654 }
2655 
2656 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2657 
2658 #undef __FUNCT__
2659 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2660 /*
2661     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2662 */
2663 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2664 {
2665   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2666   PetscErrorCode ierr;
2667   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2668   PetscInt       nz = a->i[m],row,*jj,mr,col;
2669 
2670   PetscFunctionBegin;
2671   *nn = n;
2672   if (!ia) PetscFunctionReturn(0);
2673   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2674   else {
2675     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2676     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2677     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2678     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2679     jj   = a->j;
2680     for (i=0; i<nz; i++) {
2681       collengths[jj[i]]++;
2682     }
2683     cia[0] = oshift;
2684     for (i=0; i<n; i++) {
2685       cia[i+1] = cia[i] + collengths[i];
2686     }
2687     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2688     jj   = a->j;
2689     for (row=0; row<m; row++) {
2690       mr = a->i[row+1] - a->i[row];
2691       for (i=0; i<mr; i++) {
2692         col = *jj++;
2693 
2694         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2695       }
2696     }
2697     ierr = PetscFree(collengths);CHKERRQ(ierr);
2698     *ia  = cia; *ja = cja;
2699   }
2700   PetscFunctionReturn(0);
2701 }
2702 
2703 #undef __FUNCT__
2704 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2705 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2706 {
2707   PetscErrorCode ierr;
2708 
2709   PetscFunctionBegin;
2710   if (!ia) PetscFunctionReturn(0);
2711   ierr = PetscFree(*ia);CHKERRQ(ierr);
2712   ierr = PetscFree(*ja);CHKERRQ(ierr);
2713   PetscFunctionReturn(0);
2714 }
2715 
2716 #undef __FUNCT__
2717 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2718 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2719 {
2720   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
2721   PetscErrorCode ierr;
2722   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2723   PetscScalar    dx,*y,*xx,*w3_array;
2724   PetscScalar    *vscale_array;
2725   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2726   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
2727   void           *fctx   = coloring->fctx;
2728   PetscBool      flg     = PETSC_FALSE;
2729   PetscInt       ctype   = coloring->ctype,N,col_start=0,col_end=0;
2730   Vec            x1_tmp;
2731 
2732   PetscFunctionBegin;
2733   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2734   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2735   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2736   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2737 
2738   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2739   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2740   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
2741   if (flg) {
2742     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2743   } else {
2744     PetscBool assembled;
2745     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2746     if (assembled) {
2747       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2748     }
2749   }
2750 
2751   x1_tmp = x1;
2752   if (!coloring->vscale) {
2753     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2754   }
2755 
2756   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2757     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2758   }
2759   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2760 
2761   /* Set w1 = F(x1) */
2762   if (!coloring->fset) {
2763     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2764     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2765     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2766   } else {
2767     coloring->fset = PETSC_FALSE;
2768   }
2769 
2770   if (!coloring->w3) {
2771     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2772     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2773   }
2774   w3 = coloring->w3;
2775 
2776   /* Compute all the local scale factors, including ghost points */
2777   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2778   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2779   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2780   if (ctype == IS_COLORING_GHOSTED) {
2781     col_start = 0; col_end = N;
2782   } else if (ctype == IS_COLORING_GLOBAL) {
2783     xx           = xx - start;
2784     vscale_array = vscale_array - start;
2785     col_start    = start; col_end = N + start;
2786   }
2787   for (col=col_start; col<col_end; col++) {
2788     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2789     if (coloring->htype[0] == 'w') {
2790       dx = 1.0 + unorm;
2791     } else {
2792       dx = xx[col];
2793     }
2794     if (dx == (PetscScalar)0.0) dx = 1.0;
2795     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2796     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2797     dx               *= epsilon;
2798     vscale_array[col] = (PetscScalar)1.0/dx;
2799   }
2800   if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start;
2801   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2802   if (ctype == IS_COLORING_GLOBAL) {
2803     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2804     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2805   }
2806   if (coloring->vscaleforrow) {
2807     vscaleforrow = coloring->vscaleforrow;
2808   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2809 
2810   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2811   /*
2812     Loop over each color
2813   */
2814   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2815   for (k=0; k<coloring->ncolors; k++) {
2816     coloring->currentcolor = k;
2817     for (i=0; i<bs; i++) {
2818       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2819       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2820       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2821       /*
2822         Loop over each column associated with color
2823         adding the perturbation to the vector w3.
2824       */
2825       for (l=0; l<coloring->ncolumns[k]; l++) {
2826         col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2827         if (coloring->htype[0] == 'w') {
2828           dx = 1.0 + unorm;
2829         } else {
2830           dx = xx[col];
2831         }
2832         if (dx == (PetscScalar)0.0) dx = 1.0;
2833         if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2834         else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2835         dx *= epsilon;
2836         if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2837         w3_array[col] += dx;
2838       }
2839       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2840       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2841 
2842       /*
2843         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2844         w2 = F(x1 + dx) - F(x1)
2845       */
2846       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2847       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2848       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2849       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2850 
2851       /*
2852         Loop over rows of vector, putting results into Jacobian matrix
2853       */
2854       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2855       for (l=0; l<coloring->nrows[k]; l++) {
2856         row = bs*coloring->rows[k][l];                /* local row index */
2857         col = i + bs*coloring->columnsforrow[k][l];       /* global column index */
2858         for (j=0; j<bs; j++) {
2859           y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2860           srows[j]  = row + start + j;
2861         }
2862         ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2863       }
2864       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2865     }
2866   } /* endof for each color */
2867   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2868   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2869   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2870   ierr = PetscFree(srows);CHKERRQ(ierr);
2871 
2872   coloring->currentcolor = -1;
2873 
2874   ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2875   ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2876   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2877   PetscFunctionReturn(0);
2878 }
2879 
2880 /* -------------------------------------------------------------------*/
2881 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2882                                        MatGetRow_SeqBAIJ,
2883                                        MatRestoreRow_SeqBAIJ,
2884                                        MatMult_SeqBAIJ_N,
2885                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2886                                        MatMultTranspose_SeqBAIJ,
2887                                        MatMultTransposeAdd_SeqBAIJ,
2888                                        0,
2889                                        0,
2890                                        0,
2891                                /* 10*/ 0,
2892                                        MatLUFactor_SeqBAIJ,
2893                                        0,
2894                                        0,
2895                                        MatTranspose_SeqBAIJ,
2896                                /* 15*/ MatGetInfo_SeqBAIJ,
2897                                        MatEqual_SeqBAIJ,
2898                                        MatGetDiagonal_SeqBAIJ,
2899                                        MatDiagonalScale_SeqBAIJ,
2900                                        MatNorm_SeqBAIJ,
2901                                /* 20*/ 0,
2902                                        MatAssemblyEnd_SeqBAIJ,
2903                                        MatSetOption_SeqBAIJ,
2904                                        MatZeroEntries_SeqBAIJ,
2905                                /* 24*/ MatZeroRows_SeqBAIJ,
2906                                        0,
2907                                        0,
2908                                        0,
2909                                        0,
2910                                /* 29*/ MatSetUp_SeqBAIJ,
2911                                        0,
2912                                        0,
2913                                        0,
2914                                        0,
2915                                /* 34*/ MatDuplicate_SeqBAIJ,
2916                                        0,
2917                                        0,
2918                                        MatILUFactor_SeqBAIJ,
2919                                        0,
2920                                /* 39*/ MatAXPY_SeqBAIJ,
2921                                        MatGetSubMatrices_SeqBAIJ,
2922                                        MatIncreaseOverlap_SeqBAIJ,
2923                                        MatGetValues_SeqBAIJ,
2924                                        MatCopy_SeqBAIJ,
2925                                /* 44*/ 0,
2926                                        MatScale_SeqBAIJ,
2927                                        0,
2928                                        0,
2929                                        MatZeroRowsColumns_SeqBAIJ,
2930                                /* 49*/ 0,
2931                                        MatGetRowIJ_SeqBAIJ,
2932                                        MatRestoreRowIJ_SeqBAIJ,
2933                                        MatGetColumnIJ_SeqBAIJ,
2934                                        MatRestoreColumnIJ_SeqBAIJ,
2935                                /* 54*/ MatFDColoringCreate_SeqAIJ,
2936                                        0,
2937                                        0,
2938                                        0,
2939                                        MatSetValuesBlocked_SeqBAIJ,
2940                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2941                                        MatDestroy_SeqBAIJ,
2942                                        MatView_SeqBAIJ,
2943                                        0,
2944                                        0,
2945                                /* 64*/ 0,
2946                                        0,
2947                                        0,
2948                                        0,
2949                                        0,
2950                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2951                                        0,
2952                                        MatConvert_Basic,
2953                                        0,
2954                                        0,
2955                                /* 74*/ 0,
2956                                        MatFDColoringApply_BAIJ,
2957                                        0,
2958                                        0,
2959                                        0,
2960                                /* 79*/ 0,
2961                                        0,
2962                                        0,
2963                                        0,
2964                                        MatLoad_SeqBAIJ,
2965                                /* 84*/ 0,
2966                                        0,
2967                                        0,
2968                                        0,
2969                                        0,
2970                                /* 89*/ 0,
2971                                        0,
2972                                        0,
2973                                        0,
2974                                        0,
2975                                /* 94*/ 0,
2976                                        0,
2977                                        0,
2978                                        0,
2979                                        0,
2980                                /* 99*/ 0,
2981                                        0,
2982                                        0,
2983                                        0,
2984                                        0,
2985                                /*104*/ 0,
2986                                        MatRealPart_SeqBAIJ,
2987                                        MatImaginaryPart_SeqBAIJ,
2988                                        0,
2989                                        0,
2990                                /*109*/ 0,
2991                                        0,
2992                                        0,
2993                                        0,
2994                                        MatMissingDiagonal_SeqBAIJ,
2995                                /*114*/ 0,
2996                                        0,
2997                                        0,
2998                                        0,
2999                                        0,
3000                                /*119*/ 0,
3001                                        0,
3002                                        MatMultHermitianTranspose_SeqBAIJ,
3003                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3004                                        0,
3005                                /*124*/ 0,
3006                                        0,
3007                                        MatInvertBlockDiagonal_SeqBAIJ,
3008                                        0,
3009                                        0,
3010                                /*129*/ 0,
3011                                        0,
3012                                        0,
3013                                        0,
3014                                        0,
3015                                /*134*/ 0,
3016                                        0,
3017                                        0,
3018                                        0,
3019                                        0,
3020                                /*139*/ 0,
3021                                        0
3022 };
3023 
3024 #undef __FUNCT__
3025 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3026 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3027 {
3028   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
3029   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
3030   PetscErrorCode ierr;
3031 
3032   PetscFunctionBegin;
3033   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3034 
3035   /* allocate space for values if not already there */
3036   if (!aij->saved_values) {
3037     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3038     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3039   }
3040 
3041   /* copy values over */
3042   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3043   PetscFunctionReturn(0);
3044 }
3045 
3046 #undef __FUNCT__
3047 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3048 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3049 {
3050   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
3051   PetscErrorCode ierr;
3052   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3053 
3054   PetscFunctionBegin;
3055   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3056   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3057 
3058   /* copy values over */
3059   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3060   PetscFunctionReturn(0);
3061 }
3062 
3063 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3064 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3065 
3066 #undef __FUNCT__
3067 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3068 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3069 {
3070   Mat_SeqBAIJ    *b;
3071   PetscErrorCode ierr;
3072   PetscInt       i,mbs,nbs,bs2;
3073   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3074 
3075   PetscFunctionBegin;
3076   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3077   if (nz == MAT_SKIP_ALLOCATION) {
3078     skipallocation = PETSC_TRUE;
3079     nz             = 0;
3080   }
3081 
3082   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3083   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3084   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3085   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3086   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3087 
3088   B->preallocated = PETSC_TRUE;
3089 
3090   mbs = B->rmap->n/bs;
3091   nbs = B->cmap->n/bs;
3092   bs2 = bs*bs;
3093 
3094   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3095 
3096   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3097   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3098   if (nnz) {
3099     for (i=0; i<mbs; i++) {
3100       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3101       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3102     }
3103   }
3104 
3105   b    = (Mat_SeqBAIJ*)B->data;
3106   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3107   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3108   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3109 
3110   if (!flg) {
3111     switch (bs) {
3112     case 1:
3113       B->ops->mult    = MatMult_SeqBAIJ_1;
3114       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3115       B->ops->sor     = MatSOR_SeqBAIJ_1;
3116       break;
3117     case 2:
3118       B->ops->mult    = MatMult_SeqBAIJ_2;
3119       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3120       B->ops->sor     = MatSOR_SeqBAIJ_2;
3121       break;
3122     case 3:
3123       B->ops->mult    = MatMult_SeqBAIJ_3;
3124       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3125       B->ops->sor     = MatSOR_SeqBAIJ_3;
3126       break;
3127     case 4:
3128       B->ops->mult    = MatMult_SeqBAIJ_4;
3129       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3130       B->ops->sor     = MatSOR_SeqBAIJ_4;
3131       break;
3132     case 5:
3133       B->ops->mult    = MatMult_SeqBAIJ_5;
3134       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3135       B->ops->sor     = MatSOR_SeqBAIJ_5;
3136       break;
3137     case 6:
3138       B->ops->mult    = MatMult_SeqBAIJ_6;
3139       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3140       B->ops->sor     = MatSOR_SeqBAIJ_6;
3141       break;
3142     case 7:
3143       B->ops->mult    = MatMult_SeqBAIJ_7;
3144       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3145       B->ops->sor     = MatSOR_SeqBAIJ_7;
3146       break;
3147     case 15:
3148       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
3149       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3150       B->ops->sor     = MatSOR_SeqBAIJ_N;
3151       break;
3152     default:
3153       B->ops->mult    = MatMult_SeqBAIJ_N;
3154       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3155       B->ops->sor     = MatSOR_SeqBAIJ_N;
3156       break;
3157     }
3158   }
3159   b->mbs = mbs;
3160   b->nbs = nbs;
3161   if (!skipallocation) {
3162     if (!b->imax) {
3163       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3164       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3165 
3166       b->free_imax_ilen = PETSC_TRUE;
3167     }
3168     /* b->ilen will count nonzeros in each block row so far. */
3169     for (i=0; i<mbs; i++) b->ilen[i] = 0;
3170     if (!nnz) {
3171       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3172       else if (nz < 0) nz = 1;
3173       for (i=0; i<mbs; i++) b->imax[i] = nz;
3174       nz = nz*mbs;
3175     } else {
3176       nz = 0;
3177       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3178     }
3179 
3180     /* allocate the matrix space */
3181     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3182     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3183     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3184     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3185     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3186 
3187     b->singlemalloc = PETSC_TRUE;
3188     b->i[0]         = 0;
3189     for (i=1; i<mbs+1; i++) {
3190       b->i[i] = b->i[i-1] + b->imax[i-1];
3191     }
3192     b->free_a  = PETSC_TRUE;
3193     b->free_ij = PETSC_TRUE;
3194   } else {
3195     b->free_a  = PETSC_FALSE;
3196     b->free_ij = PETSC_FALSE;
3197   }
3198 
3199   b->bs2              = bs2;
3200   b->mbs              = mbs;
3201   b->nz               = 0;
3202   b->maxnz            = nz;
3203   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3204   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3205   PetscFunctionReturn(0);
3206 }
3207 
3208 #undef __FUNCT__
3209 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3210 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3211 {
3212   PetscInt       i,m,nz,nz_max=0,*nnz;
3213   PetscScalar    *values=0;
3214   PetscErrorCode ierr;
3215 
3216   PetscFunctionBegin;
3217   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3218   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3219   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3220   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3221   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3222   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3223   m    = B->rmap->n/bs;
3224 
3225   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3226   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3227   for (i=0; i<m; i++) {
3228     nz = ii[i+1]- ii[i];
3229     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3230     nz_max = PetscMax(nz_max, nz);
3231     nnz[i] = nz;
3232   }
3233   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3234   ierr = PetscFree(nnz);CHKERRQ(ierr);
3235 
3236   values = (PetscScalar*)V;
3237   if (!values) {
3238     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3239     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3240   }
3241   for (i=0; i<m; i++) {
3242     PetscInt          ncols  = ii[i+1] - ii[i];
3243     const PetscInt    *icols = jj + ii[i];
3244     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3245     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3246   }
3247   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3248   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3249   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3250   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3251   PetscFunctionReturn(0);
3252 }
3253 
3254 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3255 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3256 #if defined(PETSC_HAVE_MUMPS)
3257 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3258 #endif
3259 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*);
3260 
3261 /*MC
3262    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3263    block sparse compressed row format.
3264 
3265    Options Database Keys:
3266 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3267 
3268   Level: beginner
3269 
3270 .seealso: MatCreateSeqBAIJ()
3271 M*/
3272 
3273 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3274 
3275 #undef __FUNCT__
3276 #define __FUNCT__ "MatCreate_SeqBAIJ"
3277 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3278 {
3279   PetscErrorCode ierr;
3280   PetscMPIInt    size;
3281   Mat_SeqBAIJ    *b;
3282 
3283   PetscFunctionBegin;
3284   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3285   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3286 
3287   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3288   B->data = (void*)b;
3289   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3290 
3291   b->row          = 0;
3292   b->col          = 0;
3293   b->icol         = 0;
3294   b->reallocs     = 0;
3295   b->saved_values = 0;
3296 
3297   b->roworiented        = PETSC_TRUE;
3298   b->nonew              = 0;
3299   b->diag               = 0;
3300   b->solve_work         = 0;
3301   b->mult_work          = 0;
3302   B->spptr              = 0;
3303   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3304   b->keepnonzeropattern = PETSC_FALSE;
3305   b->xtoy               = 0;
3306   b->XtoY               = 0;
3307   B->same_nonzero       = PETSC_FALSE;
3308 
3309   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3310   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3311   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3312 #if defined(PETSC_HAVE_MUMPS)
3313   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3314 #endif
3315   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3316   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3317   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3318   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3319   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3320   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3321   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3322   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3323   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3324   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3325   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3326   PetscFunctionReturn(0);
3327 }
3328 
3329 #undef __FUNCT__
3330 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3331 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3332 {
3333   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3334   PetscErrorCode ierr;
3335   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3336 
3337   PetscFunctionBegin;
3338   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3339 
3340   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3341     c->imax           = a->imax;
3342     c->ilen           = a->ilen;
3343     c->free_imax_ilen = PETSC_FALSE;
3344   } else {
3345     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3346     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3347     for (i=0; i<mbs; i++) {
3348       c->imax[i] = a->imax[i];
3349       c->ilen[i] = a->ilen[i];
3350     }
3351     c->free_imax_ilen = PETSC_TRUE;
3352   }
3353 
3354   /* allocate the matrix space */
3355   if (mallocmatspace) {
3356     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3357       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3358       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3359       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3360 
3361       c->i            = a->i;
3362       c->j            = a->j;
3363       c->singlemalloc = PETSC_FALSE;
3364       c->free_a       = PETSC_TRUE;
3365       c->free_ij      = PETSC_FALSE;
3366       c->parent       = A;
3367       C->preallocated = PETSC_TRUE;
3368       C->assembled    = PETSC_TRUE;
3369 
3370       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3371       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3372       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3373     } else {
3374       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3375       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3376 
3377       c->singlemalloc = PETSC_TRUE;
3378       c->free_a       = PETSC_TRUE;
3379       c->free_ij      = PETSC_TRUE;
3380 
3381       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3382       if (mbs > 0) {
3383         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3384         if (cpvalues == MAT_COPY_VALUES) {
3385           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3386         } else {
3387           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3388         }
3389       }
3390       C->preallocated = PETSC_TRUE;
3391       C->assembled    = PETSC_TRUE;
3392     }
3393   }
3394 
3395   c->roworiented = a->roworiented;
3396   c->nonew       = a->nonew;
3397 
3398   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3399   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3400 
3401   c->bs2         = a->bs2;
3402   c->mbs         = a->mbs;
3403   c->nbs         = a->nbs;
3404 
3405   if (a->diag) {
3406     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3407       c->diag      = a->diag;
3408       c->free_diag = PETSC_FALSE;
3409     } else {
3410       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3411       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3412       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3413       c->free_diag = PETSC_TRUE;
3414     }
3415   } else c->diag = 0;
3416 
3417   c->nz         = a->nz;
3418   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3419   c->solve_work = 0;
3420   c->mult_work  = 0;
3421 
3422   c->compressedrow.use   = a->compressedrow.use;
3423   c->compressedrow.nrows = a->compressedrow.nrows;
3424   c->compressedrow.check = a->compressedrow.check;
3425   if (a->compressedrow.use) {
3426     i    = a->compressedrow.nrows;
3427     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3428     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3429     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3430     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3431   } else {
3432     c->compressedrow.use    = PETSC_FALSE;
3433     c->compressedrow.i      = NULL;
3434     c->compressedrow.rindex = NULL;
3435   }
3436   C->same_nonzero = A->same_nonzero;
3437 
3438   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3439   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3440   PetscFunctionReturn(0);
3441 }
3442 
3443 #undef __FUNCT__
3444 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3445 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3446 {
3447   PetscErrorCode ierr;
3448 
3449   PetscFunctionBegin;
3450   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3451   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3452   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3453   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3454   PetscFunctionReturn(0);
3455 }
3456 
3457 #undef __FUNCT__
3458 #define __FUNCT__ "MatLoad_SeqBAIJ"
3459 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3460 {
3461   Mat_SeqBAIJ    *a;
3462   PetscErrorCode ierr;
3463   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3464   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3465   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3466   PetscInt       *masked,nmask,tmp,bs2,ishift;
3467   PetscMPIInt    size;
3468   int            fd;
3469   PetscScalar    *aa;
3470   MPI_Comm       comm;
3471 
3472   PetscFunctionBegin;
3473   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3474   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3475   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3476   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3477   bs2  = bs*bs;
3478 
3479   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3480   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3481   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3482   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3483   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3484   M = header[1]; N = header[2]; nz = header[3];
3485 
3486   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3487   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3488 
3489   /*
3490      This code adds extra rows to make sure the number of rows is
3491     divisible by the blocksize
3492   */
3493   mbs        = M/bs;
3494   extra_rows = bs - M + bs*(mbs);
3495   if (extra_rows == bs) extra_rows = 0;
3496   else mbs++;
3497   if (extra_rows) {
3498     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3499   }
3500 
3501   /* Set global sizes if not already set */
3502   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3503     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3504   } else { /* Check if the matrix global sizes are correct */
3505     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3506     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3507       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3508     }
3509     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3510   }
3511 
3512   /* read in row lengths */
3513   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3514   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3515   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3516 
3517   /* read in column indices */
3518   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3519   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3520   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3521 
3522   /* loop over row lengths determining block row lengths */
3523   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3524   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3525   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3526   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3527   rowcount = 0;
3528   nzcount  = 0;
3529   for (i=0; i<mbs; i++) {
3530     nmask = 0;
3531     for (j=0; j<bs; j++) {
3532       kmax = rowlengths[rowcount];
3533       for (k=0; k<kmax; k++) {
3534         tmp = jj[nzcount++]/bs;
3535         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3536       }
3537       rowcount++;
3538     }
3539     browlengths[i] += nmask;
3540     /* zero out the mask elements we set */
3541     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3542   }
3543 
3544   /* Do preallocation  */
3545   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3546   a    = (Mat_SeqBAIJ*)newmat->data;
3547 
3548   /* set matrix "i" values */
3549   a->i[0] = 0;
3550   for (i=1; i<= mbs; i++) {
3551     a->i[i]      = a->i[i-1] + browlengths[i-1];
3552     a->ilen[i-1] = browlengths[i-1];
3553   }
3554   a->nz = 0;
3555   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3556 
3557   /* read in nonzero values */
3558   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3559   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3560   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3561 
3562   /* set "a" and "j" values into matrix */
3563   nzcount = 0; jcount = 0;
3564   for (i=0; i<mbs; i++) {
3565     nzcountb = nzcount;
3566     nmask    = 0;
3567     for (j=0; j<bs; j++) {
3568       kmax = rowlengths[i*bs+j];
3569       for (k=0; k<kmax; k++) {
3570         tmp = jj[nzcount++]/bs;
3571         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3572       }
3573     }
3574     /* sort the masked values */
3575     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3576 
3577     /* set "j" values into matrix */
3578     maskcount = 1;
3579     for (j=0; j<nmask; j++) {
3580       a->j[jcount++]  = masked[j];
3581       mask[masked[j]] = maskcount++;
3582     }
3583     /* set "a" values into matrix */
3584     ishift = bs2*a->i[i];
3585     for (j=0; j<bs; j++) {
3586       kmax = rowlengths[i*bs+j];
3587       for (k=0; k<kmax; k++) {
3588         tmp       = jj[nzcountb]/bs;
3589         block     = mask[tmp] - 1;
3590         point     = jj[nzcountb] - bs*tmp;
3591         idx       = ishift + bs2*block + j + bs*point;
3592         a->a[idx] = (MatScalar)aa[nzcountb++];
3593       }
3594     }
3595     /* zero out the mask elements we set */
3596     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3597   }
3598   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3599 
3600   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3601   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3602   ierr = PetscFree(aa);CHKERRQ(ierr);
3603   ierr = PetscFree(jj);CHKERRQ(ierr);
3604   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3605 
3606   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3607   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3608   PetscFunctionReturn(0);
3609 }
3610 
3611 #undef __FUNCT__
3612 #define __FUNCT__ "MatCreateSeqBAIJ"
3613 /*@C
3614    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3615    compressed row) format.  For good matrix assembly performance the
3616    user should preallocate the matrix storage by setting the parameter nz
3617    (or the array nnz).  By setting these parameters accurately, performance
3618    during matrix assembly can be increased by more than a factor of 50.
3619 
3620    Collective on MPI_Comm
3621 
3622    Input Parameters:
3623 +  comm - MPI communicator, set to PETSC_COMM_SELF
3624 .  bs - size of block
3625 .  m - number of rows
3626 .  n - number of columns
3627 .  nz - number of nonzero blocks  per block row (same for all rows)
3628 -  nnz - array containing the number of nonzero blocks in the various block rows
3629          (possibly different for each block row) or NULL
3630 
3631    Output Parameter:
3632 .  A - the matrix
3633 
3634    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3635    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3636    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3637 
3638    Options Database Keys:
3639 .   -mat_no_unroll - uses code that does not unroll the loops in the
3640                      block calculations (much slower)
3641 .    -mat_block_size - size of the blocks to use
3642 
3643    Level: intermediate
3644 
3645    Notes:
3646    The number of rows and columns must be divisible by blocksize.
3647 
3648    If the nnz parameter is given then the nz parameter is ignored
3649 
3650    A nonzero block is any block that as 1 or more nonzeros in it
3651 
3652    The block AIJ format is fully compatible with standard Fortran 77
3653    storage.  That is, the stored row and column indices can begin at
3654    either one (as in Fortran) or zero.  See the users' manual for details.
3655 
3656    Specify the preallocated storage with either nz or nnz (not both).
3657    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3658    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3659    matrices.
3660 
3661 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3662 @*/
3663 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3664 {
3665   PetscErrorCode ierr;
3666 
3667   PetscFunctionBegin;
3668   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3669   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3670   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3671   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3672   PetscFunctionReturn(0);
3673 }
3674 
3675 #undef __FUNCT__
3676 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3677 /*@C
3678    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3679    per row in the matrix. For good matrix assembly performance the
3680    user should preallocate the matrix storage by setting the parameter nz
3681    (or the array nnz).  By setting these parameters accurately, performance
3682    during matrix assembly can be increased by more than a factor of 50.
3683 
3684    Collective on MPI_Comm
3685 
3686    Input Parameters:
3687 +  A - the matrix
3688 .  bs - size of block
3689 .  nz - number of block nonzeros per block row (same for all rows)
3690 -  nnz - array containing the number of block nonzeros in the various block rows
3691          (possibly different for each block row) or NULL
3692 
3693    Options Database Keys:
3694 .   -mat_no_unroll - uses code that does not unroll the loops in the
3695                      block calculations (much slower)
3696 .    -mat_block_size - size of the blocks to use
3697 
3698    Level: intermediate
3699 
3700    Notes:
3701    If the nnz parameter is given then the nz parameter is ignored
3702 
3703    You can call MatGetInfo() to get information on how effective the preallocation was;
3704    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3705    You can also run with the option -info and look for messages with the string
3706    malloc in them to see if additional memory allocation was needed.
3707 
3708    The block AIJ format is fully compatible with standard Fortran 77
3709    storage.  That is, the stored row and column indices can begin at
3710    either one (as in Fortran) or zero.  See the users' manual for details.
3711 
3712    Specify the preallocated storage with either nz or nnz (not both).
3713    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3714    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3715 
3716 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3717 @*/
3718 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3719 {
3720   PetscErrorCode ierr;
3721 
3722   PetscFunctionBegin;
3723   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3724   PetscValidType(B,1);
3725   PetscValidLogicalCollectiveInt(B,bs,2);
3726   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3727   PetscFunctionReturn(0);
3728 }
3729 
3730 #undef __FUNCT__
3731 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3732 /*@C
3733    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3734    (the default sequential PETSc format).
3735 
3736    Collective on MPI_Comm
3737 
3738    Input Parameters:
3739 +  A - the matrix
3740 .  i - the indices into j for the start of each local row (starts with zero)
3741 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3742 -  v - optional values in the matrix
3743 
3744    Level: developer
3745 
3746 .keywords: matrix, aij, compressed row, sparse
3747 
3748 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3749 @*/
3750 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3751 {
3752   PetscErrorCode ierr;
3753 
3754   PetscFunctionBegin;
3755   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3756   PetscValidType(B,1);
3757   PetscValidLogicalCollectiveInt(B,bs,2);
3758   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3759   PetscFunctionReturn(0);
3760 }
3761 
3762 
3763 #undef __FUNCT__
3764 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3765 /*@
3766      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3767 
3768      Collective on MPI_Comm
3769 
3770    Input Parameters:
3771 +  comm - must be an MPI communicator of size 1
3772 .  bs - size of block
3773 .  m - number of rows
3774 .  n - number of columns
3775 .  i - row indices
3776 .  j - column indices
3777 -  a - matrix values
3778 
3779    Output Parameter:
3780 .  mat - the matrix
3781 
3782    Level: advanced
3783 
3784    Notes:
3785        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3786     once the matrix is destroyed
3787 
3788        You cannot set new nonzero locations into this matrix, that will generate an error.
3789 
3790        The i and j indices are 0 based
3791 
3792        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3793 
3794 
3795 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3796 
3797 @*/
3798 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3799 {
3800   PetscErrorCode ierr;
3801   PetscInt       ii;
3802   Mat_SeqBAIJ    *baij;
3803 
3804   PetscFunctionBegin;
3805   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3806   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3807 
3808   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3809   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3810   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3811   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3812   baij = (Mat_SeqBAIJ*)(*mat)->data;
3813   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3814   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3815 
3816   baij->i = i;
3817   baij->j = j;
3818   baij->a = a;
3819 
3820   baij->singlemalloc = PETSC_FALSE;
3821   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3822   baij->free_a       = PETSC_FALSE;
3823   baij->free_ij      = PETSC_FALSE;
3824 
3825   for (ii=0; ii<m; ii++) {
3826     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3827 #if defined(PETSC_USE_DEBUG)
3828     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3829 #endif
3830   }
3831 #if defined(PETSC_USE_DEBUG)
3832   for (ii=0; ii<baij->i[m]; ii++) {
3833     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3834     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3835   }
3836 #endif
3837 
3838   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3839   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3840   PetscFunctionReturn(0);
3841 }
3842