xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 7a3afff6730f82e9315b80da4b583bc441ed2b08)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values)*values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag    = a->idiag;
33   mdiag   = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs){
37     case 1:
38       for (i=0; i<mbs; i++) {
39         odiag = v + 1*diag_offset[i];
40         diag[0]  = odiag[0];
41         mdiag[0] = odiag[0];
42         diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43         diag    += 1;
44         mdiag   += 1;
45       }
46       break;
47     case 2:
48       for (i=0; i<mbs; i++) {
49         odiag   = v + 4*diag_offset[i];
50         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52 	ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53 	diag    += 4;
54 	mdiag   += 4;
55       }
56       break;
57     case 3:
58       for (i=0; i<mbs; i++) {
59         odiag    = v + 9*diag_offset[i];
60         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62         diag[8]  = odiag[8];
63         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65         mdiag[8] = odiag[8];
66 	ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67 	diag    += 9;
68 	mdiag   += 9;
69       }
70       break;
71     case 4:
72       for (i=0; i<mbs; i++) {
73         odiag  = v + 16*diag_offset[i];
74         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76 	ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77 	diag  += 16;
78 	mdiag += 16;
79       }
80       break;
81     case 5:
82       for (i=0; i<mbs; i++) {
83         odiag = v + 25*diag_offset[i];
84         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86 	ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87 	diag  += 25;
88 	mdiag += 25;
89       }
90       break;
91     case 6:
92       for (i=0; i<mbs; i++) {
93         odiag = v + 36*diag_offset[i];
94         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96 	ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97 	diag  += 36;
98 	mdiag += 36;
99       }
100       break;
101     case 7:
102       for (i=0; i<mbs; i++) {
103         odiag = v + 49*diag_offset[i];
104         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106 	ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107 	diag  += 49;
108 	mdiag += 49;
109       }
110       break;
111     default:
112       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113       for (i=0; i<mbs; i++) {
114         odiag = v + bs2*diag_offset[i];
115         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117         ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118 	diag  += bs2;
119 	mdiag += bs2;
120       }
121       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar        *x,x1,s1;
133   const PetscScalar  *b;
134   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode     ierr;
136   PetscInt           m = a->mbs,i,i2,nz,j;
137   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
157       x[0] = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v     = aa + ai[i];
162         vi    = aj + ai[i];
163         nz    = diag[i] - ai[i];
164         s1    = b[i2];
165         for (j=0; j<nz; j++) {
166           s1 -= v[j]*x[vi[j]];
167         }
168         x[i2]   = idiag[0]*s1;
169         idiag   += 1;
170         i2      += 1;
171       }
172       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
173       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
174     }
175     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
176         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
177       i2    = 0;
178       mdiag = a->idiag+a->mbs;
179       for (i=0; i<m; i++) {
180         x1      = x[i2];
181         x[i2]   = mdiag[0]*x1;
182         mdiag  += 1;
183         i2     += 1;
184       }
185       ierr = PetscLogFlops(m);CHKERRQ(ierr);
186     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
187       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
188     }
189     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
190       idiag   = a->idiag+a->mbs - 1;
191       i2      = m - 1;
192       x1      = x[i2];
193       x[i2]   = idiag[0]*x1;
194       idiag -= 1;
195       i2    -= 1;
196       for (i=m-2; i>=0; i--) {
197         v     = aa + (diag[i]+1);
198         vi    = aj + diag[i] + 1;
199         nz    = ai[i+1] - diag[i] - 1;
200         s1    = x[i2];
201         for (j=0; j<nz; j++) {
202           s1 -= v[j]*x[vi[j]];
203         }
204         x[i2]   = idiag[0]*s1;
205         idiag   -= 1;
206         i2      -= 1;
207       }
208       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
220 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,s1,s2;
224   const PetscScalar  *b;
225   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
232   its = its*lits;
233   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
238 
239   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
240 
241   diag  = a->diag;
242   idiag = a->idiag;
243   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
244   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
245 
246   if (flag & SOR_ZERO_INITIAL_GUESS) {
247     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
248       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
249       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
250       i2     = 2;
251       idiag += 4;
252       for (i=1; i<m; i++) {
253 	v     = aa + 4*ai[i];
254 	vi    = aj + ai[i];
255 	nz    = diag[i] - ai[i];
256 	s1    = b[i2]; s2 = b[i2+1];
257         for (j=0; j<nz; j++) {
258 	  idx  = 2*vi[j];
259           it   = 4*j;
260 	  x1   = x[idx]; x2 = x[1+idx];
261 	  s1  -= v[it]*x1 + v[it+2]*x2;
262 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
265 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
266         idiag   += 4;
267         i2      += 2;
268       }
269       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
270       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
271     }
272     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
273         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
274       i2    = 0;
275       mdiag = a->idiag+4*a->mbs;
276       for (i=0; i<m; i++) {
277         x1      = x[i2]; x2 = x[i2+1];
278         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
279         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
280         mdiag  += 4;
281         i2     += 2;
282       }
283       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
284     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
285       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
286     }
287     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
288       idiag   = a->idiag+4*a->mbs - 4;
289       i2      = 2*m - 2;
290       x1      = x[i2]; x2 = x[i2+1];
291       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
292       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
293       idiag -= 4;
294       i2    -= 2;
295       for (i=m-2; i>=0; i--) {
296 	v     = aa + 4*(diag[i]+1);
297 	vi    = aj + diag[i] + 1;
298 	nz    = ai[i+1] - diag[i] - 1;
299 	s1    = x[i2]; s2 = x[i2+1];
300         for (j=0; j<nz; j++) {
301  	  idx  = 2*vi[j];
302           it   = 4*j;
303 	  x1   = x[idx]; x2 = x[1+idx];
304 	  s1  -= v[it]*x1 + v[it+2]*x2;
305 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
306 	}
307 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
308 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
309         idiag   -= 4;
310         i2      -= 2;
311       }
312       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
313     }
314   } else {
315     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
316   }
317   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
318   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
324 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
325 {
326   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
327   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
328   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
329   const PetscScalar  *b;
330   PetscErrorCode     ierr;
331   PetscInt           m = a->mbs,i,i2,nz,idx;
332   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
333 
334   PetscFunctionBegin;
335   its = its*lits;
336   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
337   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
338   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
339   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
340   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
341   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
342 
343   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
344 
345   diag  = a->diag;
346   idiag = a->idiag;
347   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
348   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
349 
350   if (flag & SOR_ZERO_INITIAL_GUESS) {
351     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
352       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
353       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
354       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
355       i2     = 3;
356       idiag += 9;
357       for (i=1; i<m; i++) {
358         v     = aa + 9*ai[i];
359         vi    = aj + ai[i];
360         nz    = diag[i] - ai[i];
361         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
362         while (nz--) {
363           idx  = 3*(*vi++);
364           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
365           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
366           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
367           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
368           v   += 9;
369         }
370         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
371         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
372         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
373         idiag   += 9;
374         i2      += 3;
375       }
376       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
377       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
378     }
379     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
380         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
381       i2    = 0;
382       mdiag = a->idiag+9*a->mbs;
383       for (i=0; i<m; i++) {
384         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
385         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
386         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
387         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
388         mdiag  += 9;
389         i2     += 3;
390       }
391       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
392     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
393       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
394     }
395     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
396       idiag   = a->idiag+9*a->mbs - 9;
397       i2      = 3*m - 3;
398       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
399       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
400       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
401       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
402       idiag -= 9;
403       i2    -= 3;
404       for (i=m-2; i>=0; i--) {
405         v     = aa + 9*(diag[i]+1);
406         vi    = aj + diag[i] + 1;
407         nz    = ai[i+1] - diag[i] - 1;
408         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
409         while (nz--) {
410           idx  = 3*(*vi++);
411           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
412           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
413           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
414           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
415           v   += 9;
416         }
417         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
418         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
419         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
420         idiag   -= 9;
421         i2      -= 3;
422       }
423       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
424     }
425   } else {
426     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
427   }
428   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
429   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
435 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
436 {
437   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
438   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
439   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
440   const PetscScalar  *b;
441   PetscErrorCode     ierr;
442   PetscInt           m = a->mbs,i,i2,nz,idx;
443   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
444 
445   PetscFunctionBegin;
446   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
447   its = its*lits;
448   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
449   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
450   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
451   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
452   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
453 
454   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
455 
456   diag  = a->diag;
457   idiag = a->idiag;
458   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
459   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
460 
461   if (flag & SOR_ZERO_INITIAL_GUESS) {
462     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
463       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
464       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
465       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
466       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
467       i2     = 4;
468       idiag += 16;
469       for (i=1; i<m; i++) {
470 	v     = aa + 16*ai[i];
471 	vi    = aj + ai[i];
472 	nz    = diag[i] - ai[i];
473 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
474 	while (nz--) {
475 	  idx  = 4*(*vi++);
476 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
477 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
478 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
479 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
480 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
481 	  v   += 16;
482 	}
483 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
484 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
485 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
486 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
487         idiag   += 16;
488         i2      += 4;
489       }
490       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
491       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
492     }
493     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
494         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
495       i2    = 0;
496       mdiag = a->idiag+16*a->mbs;
497       for (i=0; i<m; i++) {
498         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
499         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
500         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
501         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
502         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
503         mdiag  += 16;
504         i2     += 4;
505       }
506       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
507     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
508       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
509     }
510     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
511       idiag   = a->idiag+16*a->mbs - 16;
512       i2      = 4*m - 4;
513       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
515       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
516       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
517       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
518       idiag -= 16;
519       i2    -= 4;
520       for (i=m-2; i>=0; i--) {
521 	v     = aa + 16*(diag[i]+1);
522 	vi    = aj + diag[i] + 1;
523 	nz    = ai[i+1] - diag[i] - 1;
524 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
525 	while (nz--) {
526 	  idx  = 4*(*vi++);
527 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
528 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
529 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
530 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
531 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
532 	  v   += 16;
533 	}
534 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
535 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
536 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
537 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
538         idiag   -= 16;
539         i2      -= 4;
540       }
541       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
542     }
543   } else {
544     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
545   }
546   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
547   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
553 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
554 {
555   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
556   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
557   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
558   const PetscScalar  *b;
559   PetscErrorCode     ierr;
560   PetscInt           m = a->mbs,i,i2,nz,idx;
561   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
562 
563   PetscFunctionBegin;
564   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
565   its = its*lits;
566   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
567   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
568   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
569   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
570   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
571 
572   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
573 
574   diag  = a->diag;
575   idiag = a->idiag;
576   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
577   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
578 
579   if (flag & SOR_ZERO_INITIAL_GUESS) {
580     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
581       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
582       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
583       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
584       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
585       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
586       i2     = 5;
587       idiag += 25;
588       for (i=1; i<m; i++) {
589 	v     = aa + 25*ai[i];
590 	vi    = aj + ai[i];
591 	nz    = diag[i] - ai[i];
592 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
593 	while (nz--) {
594 	  idx  = 5*(*vi++);
595 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
596 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
597 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
598 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
599 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
600 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
601 	  v   += 25;
602 	}
603 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
604 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
605 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
606 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
607 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
608         idiag   += 25;
609         i2      += 5;
610       }
611       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
612       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
613     }
614     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
615         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
616       i2    = 0;
617       mdiag = a->idiag+25*a->mbs;
618       for (i=0; i<m; i++) {
619         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
620         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
621         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
622         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
623         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
624         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
625         mdiag  += 25;
626         i2     += 5;
627       }
628       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
629     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
630       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
631     }
632     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
633       idiag   = a->idiag+25*a->mbs - 25;
634       i2      = 5*m - 5;
635       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
636       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
637       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
638       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
639       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
640       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
641       idiag -= 25;
642       i2    -= 5;
643       for (i=m-2; i>=0; i--) {
644 	v     = aa + 25*(diag[i]+1);
645 	vi    = aj + diag[i] + 1;
646 	nz    = ai[i+1] - diag[i] - 1;
647 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
648 	while (nz--) {
649 	  idx  = 5*(*vi++);
650 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
651 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
652 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
653 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
654 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
655 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
656 	  v   += 25;
657 	}
658 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
659 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
660 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
661 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
662 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
663         idiag   -= 25;
664         i2      -= 5;
665       }
666       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
667     }
668   } else {
669     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
670   }
671   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
672   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
678 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
679 {
680   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
681   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
682   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
683   const PetscScalar  *b;
684   PetscErrorCode     ierr;
685   PetscInt           m = a->mbs,i,i2,nz,idx;
686   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
687 
688   PetscFunctionBegin;
689   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
690   its = its*lits;
691   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
692   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
693   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
694   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
695   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
696 
697   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
698 
699   diag  = a->diag;
700   idiag = a->idiag;
701   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
702   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
703 
704   if (flag & SOR_ZERO_INITIAL_GUESS) {
705     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
706       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
707       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
708       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
709       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
710       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
711       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
712       i2     = 6;
713       idiag += 36;
714       for (i=1; i<m; i++) {
715         v     = aa + 36*ai[i];
716         vi    = aj + ai[i];
717         nz    = diag[i] - ai[i];
718         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
719         while (nz--) {
720           idx  = 6*(*vi++);
721           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
722           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
723           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
724           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
725           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
726           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
727           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
728           v   += 36;
729         }
730         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
731         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
732         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
733         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
734         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
735         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
736         idiag   += 36;
737         i2      += 6;
738       }
739       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
740       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
741     }
742     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
743         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
744       i2    = 0;
745       mdiag = a->idiag+36*a->mbs;
746       for (i=0; i<m; i++) {
747         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
748         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
749         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
750         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
751         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
752         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
753         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
754         mdiag  += 36;
755         i2     += 6;
756       }
757       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
758     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
759       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
760     }
761     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
762       idiag   = a->idiag+36*a->mbs - 36;
763       i2      = 6*m - 6;
764       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
765       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
766       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
767       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
768       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
769       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
770       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
771       idiag -= 36;
772       i2    -= 6;
773       for (i=m-2; i>=0; i--) {
774         v     = aa + 36*(diag[i]+1);
775         vi    = aj + diag[i] + 1;
776         nz    = ai[i+1] - diag[i] - 1;
777         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
778         while (nz--) {
779           idx  = 6*(*vi++);
780           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
781           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
782           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
783           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
784           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
785           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
786           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
787           v   += 36;
788         }
789         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
790         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
791         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
792         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
793         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
794         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
795         idiag   -= 36;
796         i2      -= 6;
797       }
798       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
799     }
800   } else {
801     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
802   }
803   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
804   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
810 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
811 {
812   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
813   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
814   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
815   const PetscScalar  *b;
816   PetscErrorCode     ierr;
817   PetscInt           m = a->mbs,i,i2,nz,idx;
818   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
819 
820   PetscFunctionBegin;
821   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
822   its = its*lits;
823   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
824   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
825   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
826   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
827   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
828 
829   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
830 
831   diag  = a->diag;
832   idiag = a->idiag;
833   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
834   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
835 
836   if (flag & SOR_ZERO_INITIAL_GUESS) {
837     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
838       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
839       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
840       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
841       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
842       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
843       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
844       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
845       i2     = 7;
846       idiag += 49;
847       for (i=1; i<m; i++) {
848         v     = aa + 49*ai[i];
849         vi    = aj + ai[i];
850         nz    = diag[i] - ai[i];
851         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
852         while (nz--) {
853           idx  = 7*(*vi++);
854           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
855           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
856           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
857           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
858           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
859           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
860           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
861           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
862           v   += 49;
863         }
864         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
865         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
866         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
867         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
868         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
869         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
870         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
871         idiag   += 49;
872         i2      += 7;
873       }
874       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
875       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
876     }
877     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
878         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
879       i2    = 0;
880       mdiag = a->idiag+49*a->mbs;
881       for (i=0; i<m; i++) {
882         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
883         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
884         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
885         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
886         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
887         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
888         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
889         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
890         mdiag  += 49;
891         i2     += 7;
892       }
893       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
894     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
895       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
896     }
897     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
898       idiag   = a->idiag+49*a->mbs - 49;
899       i2      = 7*m - 7;
900       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
901       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
902       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
903       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
904       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
905       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
906       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
907       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
908       idiag -= 49;
909       i2    -= 7;
910       for (i=m-2; i>=0; i--) {
911         v     = aa + 49*(diag[i]+1);
912         vi    = aj + diag[i] + 1;
913         nz    = ai[i+1] - diag[i] - 1;
914         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
915         while (nz--) {
916           idx  = 7*(*vi++);
917           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
918           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
919           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
920           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
921           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
922           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
923           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
924           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
925           v   += 49;
926         }
927         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
928         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
929         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
930         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
931         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
932         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
933         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
934         idiag   -= 49;
935         i2      -= 7;
936       }
937       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
938     }
939   } else {
940     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
941   }
942   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
943   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
949 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
950 {
951   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
952   PetscScalar        *x,*work,*w,*workt;
953   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
954   const PetscScalar  *b;
955   PetscErrorCode     ierr;
956   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
957   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
958 
959   PetscFunctionBegin;
960   its = its*lits;
961   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
962   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
963   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
964   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
965   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
966   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
967 
968   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
969 
970   diag  = a->diag;
971   idiag = a->idiag;
972   if (!a->mult_work) {
973     k    = PetscMax(A->rmap->n,A->cmap->n);
974     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
975   }
976   work = a->mult_work;
977   if (!a->sor_work) {
978     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
979   }
980   w = a->sor_work;
981 
982   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
983   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
984 
985   if (flag & SOR_ZERO_INITIAL_GUESS) {
986     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
987       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
988       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
989       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
990       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
991       i2     = bs;
992       idiag += bs2;
993       for (i=1; i<m; i++) {
994         v     = aa + bs2*ai[i];
995         vi    = aj + ai[i];
996         nz    = diag[i] - ai[i];
997 
998         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
999 	/* copy all rows of x that are needed into contiguous space */
1000         workt = work;
1001         for (j=0; j<nz; j++) {
1002           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1003           workt += bs;
1004         }
1005         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1006        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1007         while (nz--) {
1008           idx  = N*(*vi++);
1009           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1010           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1011           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1012           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1013           v   += N2;
1014 	  } */
1015 
1016         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1017         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1018         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1019         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1020 
1021         idiag   += bs2;
1022         i2      += bs;
1023       }
1024       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1025       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1026     }
1027     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1028         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1029       i2    = 0;
1030       mdiag = a->idiag+bs2*a->mbs;
1031       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1032       for (i=0; i<m; i++) {
1033         PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1034         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1035         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1036         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1037         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1038 
1039         mdiag  += bs2;
1040         i2     += bs;
1041       }
1042       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1043     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1044       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1045     }
1046     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1047       idiag   = a->idiag+bs2*a->mbs - bs2;
1048       i2      = bs*m - bs;
1049       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1050       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1051       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1052       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1053       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1054       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1055       idiag -= bs2;
1056       i2    -= bs;
1057       for (i=m-2; i>=0; i--) {
1058         v     = aa + bs2*(diag[i]+1);
1059         vi    = aj + diag[i] + 1;
1060         nz    = ai[i+1] - diag[i] - 1;
1061 
1062         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1063 	/* copy all rows of x that are needed into contiguous space */
1064         workt = work;
1065         for (j=0; j<nz; j++) {
1066           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1067           workt += bs;
1068         }
1069         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1070         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1071         while (nz--) {
1072           idx  = N*(*vi++);
1073           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1074           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1075           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1076           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1077           v   += N2;
1078 	  } */
1079 
1080         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1081         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1082         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1083         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1084         idiag   -= bs2;
1085         i2      -= bs;
1086       }
1087       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1088     }
1089   } else {
1090     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1091   }
1092   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1093   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 /*
1098     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1099 */
1100 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1101 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1103 #define matsetvaluesblocked4_ matsetvaluesblocked4
1104 #endif
1105 
1106 EXTERN_C_BEGIN
1107 #undef __FUNCT__
1108 #define __FUNCT__ "matsetvaluesblocked4_"
1109 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1110 {
1111   Mat               A = *AA;
1112   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1113   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1114   PetscInt          *ai=a->i,*ailen=a->ilen;
1115   PetscInt          *aj=a->j,stepval,lastcol = -1;
1116   const PetscScalar *value = v;
1117   MatScalar         *ap,*aa = a->a,*bap;
1118 
1119   PetscFunctionBegin;
1120   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1121   stepval = (n-1)*4;
1122   for (k=0; k<m; k++) { /* loop over added rows */
1123     row  = im[k];
1124     rp   = aj + ai[row];
1125     ap   = aa + 16*ai[row];
1126     nrow = ailen[row];
1127     low  = 0;
1128     high = nrow;
1129     for (l=0; l<n; l++) { /* loop over added columns */
1130       col = in[l];
1131       if (col <= lastcol) low = 0; else high = nrow;
1132       lastcol = col;
1133       value = v + k*(stepval+4 + l)*4;
1134       while (high-low > 7) {
1135         t = (low+high)/2;
1136         if (rp[t] > col) high = t;
1137         else             low  = t;
1138       }
1139       for (i=low; i<high; i++) {
1140         if (rp[i] > col) break;
1141         if (rp[i] == col) {
1142           bap  = ap +  16*i;
1143           for (ii=0; ii<4; ii++,value+=stepval) {
1144             for (jj=ii; jj<16; jj+=4) {
1145               bap[jj] += *value++;
1146             }
1147           }
1148           goto noinsert2;
1149         }
1150       }
1151       N = nrow++ - 1;
1152       high++; /* added new column index thus must search to one higher than before */
1153       /* shift up all the later entries in this row */
1154       for (ii=N; ii>=i; ii--) {
1155         rp[ii+1] = rp[ii];
1156         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1157       }
1158       if (N >= i) {
1159         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1160       }
1161       rp[i] = col;
1162       bap   = ap +  16*i;
1163       for (ii=0; ii<4; ii++,value+=stepval) {
1164         for (jj=ii; jj<16; jj+=4) {
1165           bap[jj] = *value++;
1166         }
1167       }
1168       noinsert2:;
1169       low = i;
1170     }
1171     ailen[row] = nrow;
1172   }
1173   PetscFunctionReturnVoid();
1174 }
1175 EXTERN_C_END
1176 
1177 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1178 #define matsetvalues4_ MATSETVALUES4
1179 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1180 #define matsetvalues4_ matsetvalues4
1181 #endif
1182 
1183 EXTERN_C_BEGIN
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatSetValues4_"
1186 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1187 {
1188   Mat         A = *AA;
1189   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1190   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1191   PetscInt    *ai=a->i,*ailen=a->ilen;
1192   PetscInt    *aj=a->j,brow,bcol;
1193   PetscInt    ridx,cidx,lastcol = -1;
1194   MatScalar   *ap,value,*aa=a->a,*bap;
1195 
1196   PetscFunctionBegin;
1197   for (k=0; k<m; k++) { /* loop over added rows */
1198     row  = im[k]; brow = row/4;
1199     rp   = aj + ai[brow];
1200     ap   = aa + 16*ai[brow];
1201     nrow = ailen[brow];
1202     low  = 0;
1203     high = nrow;
1204     for (l=0; l<n; l++) { /* loop over added columns */
1205       col = in[l]; bcol = col/4;
1206       ridx = row % 4; cidx = col % 4;
1207       value = v[l + k*n];
1208       if (col <= lastcol) low = 0; else high = nrow;
1209       lastcol = col;
1210       while (high-low > 7) {
1211         t = (low+high)/2;
1212         if (rp[t] > bcol) high = t;
1213         else              low  = t;
1214       }
1215       for (i=low; i<high; i++) {
1216         if (rp[i] > bcol) break;
1217         if (rp[i] == bcol) {
1218           bap  = ap +  16*i + 4*cidx + ridx;
1219           *bap += value;
1220           goto noinsert1;
1221         }
1222       }
1223       N = nrow++ - 1;
1224       high++; /* added new column thus must search to one higher than before */
1225       /* shift up all the later entries in this row */
1226       for (ii=N; ii>=i; ii--) {
1227         rp[ii+1] = rp[ii];
1228         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1229       }
1230       if (N>=i) {
1231         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1232       }
1233       rp[i]                    = bcol;
1234       ap[16*i + 4*cidx + ridx] = value;
1235       noinsert1:;
1236       low = i;
1237     }
1238     ailen[brow] = nrow;
1239   }
1240   PetscFunctionReturnVoid();
1241 }
1242 EXTERN_C_END
1243 
1244 /*
1245      Checks for missing diagonals
1246 */
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1249 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1250 {
1251   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1252   PetscErrorCode ierr;
1253   PetscInt       *diag,*jj = a->j,i;
1254 
1255   PetscFunctionBegin;
1256   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1257   *missing = PETSC_FALSE;
1258   if (A->rmap->n > 0 && !jj) {
1259     *missing  = PETSC_TRUE;
1260     if (d) *d = 0;
1261     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1262   } else {
1263     diag     = a->diag;
1264     for (i=0; i<a->mbs; i++) {
1265       if (jj[diag[i]] != i) {
1266         *missing  = PETSC_TRUE;
1267         if (d) *d = i;
1268         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1269 	break;
1270       }
1271     }
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1278 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1279 {
1280   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1281   PetscErrorCode ierr;
1282   PetscInt       i,j,m = a->mbs;
1283 
1284   PetscFunctionBegin;
1285   if (!a->diag) {
1286     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1287     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1288     a->free_diag = PETSC_TRUE;
1289   }
1290   for (i=0; i<m; i++) {
1291     a->diag[i] = a->i[i+1];
1292     for (j=a->i[i]; j<a->i[i+1]; j++) {
1293       if (a->j[j] == i) {
1294         a->diag[i] = j;
1295         break;
1296       }
1297     }
1298   }
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 
1303 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1307 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1308 {
1309   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1310   PetscErrorCode ierr;
1311   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1312   PetscInt       *tia, *tja;
1313 
1314   PetscFunctionBegin;
1315   *nn = n;
1316   if (!ia) PetscFunctionReturn(0);
1317   if (symmetric) {
1318     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1319     nz   = tia[n];
1320   } else {
1321     tia = a->i; tja = a->j;
1322   }
1323 
1324   if (!blockcompressed && bs > 1) {
1325     (*nn) *= bs;
1326     /* malloc & create the natural set of indices */
1327     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1328     if (n) {
1329       (*ia)[0] = 0;
1330       for (j=1; j<bs; j++) {
1331         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1332       }
1333     }
1334 
1335     for (i=1; i<n; i++) {
1336       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1337       for (j=1; j<bs; j++) {
1338         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1339       }
1340     }
1341     if (n) {
1342       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1343     }
1344 
1345     if (ja) {
1346       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1347       cnt = 0;
1348       for (i=0; i<n; i++) {
1349         for (j=0; j<bs; j++) {
1350           for (k=tia[i]; k<tia[i+1]; k++) {
1351             for (l=0; l<bs; l++) {
1352               (*ja)[cnt++] = bs*tja[k] + l;
1353 	    }
1354           }
1355         }
1356       }
1357     }
1358 
1359     n     *= bs;
1360     nz *= bs*bs;
1361     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1362       ierr = PetscFree(tia);CHKERRQ(ierr);
1363       ierr = PetscFree(tja);CHKERRQ(ierr);
1364     }
1365   } else if (oshift == 1) {
1366     if (symmetric) {
1367       PetscInt nz = tia[A->rmap->n/bs];
1368       /*  add 1 to i and j indices */
1369       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1370       *ia = tia;
1371       if (ja) {
1372 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1373         *ja = tja;
1374       }
1375     } else {
1376       PetscInt nz = a->i[A->rmap->n/bs];
1377       /* malloc space and  add 1 to i and j indices */
1378       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1379       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1380       if (ja) {
1381 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1382 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1383       }
1384     }
1385   } else {
1386     *ia = tia;
1387     if (ja) *ja = tja;
1388   }
1389 
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1395 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1396 {
1397   PetscErrorCode ierr;
1398 
1399   PetscFunctionBegin;
1400   if (!ia) PetscFunctionReturn(0);
1401   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1402     ierr = PetscFree(*ia);CHKERRQ(ierr);
1403     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1410 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1411 {
1412   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1413   PetscErrorCode ierr;
1414 
1415   PetscFunctionBegin;
1416 #if defined(PETSC_USE_LOG)
1417   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1418 #endif
1419   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1420   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1421   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1422   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1423   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1424   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1425   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1426   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1427   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1428   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1429   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1430   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1431   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1432 
1433   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1434   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1435   ierr = PetscFree(A->data);CHKERRQ(ierr);
1436 
1437   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1438   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1439   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1440   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1441   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1442   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1443   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1444   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1445   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1446   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1447   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1453 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1454 {
1455   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1456   PetscErrorCode ierr;
1457 
1458   PetscFunctionBegin;
1459   switch (op) {
1460   case MAT_ROW_ORIENTED:
1461     a->roworiented    = flg;
1462     break;
1463   case MAT_KEEP_NONZERO_PATTERN:
1464     a->keepnonzeropattern = flg;
1465     break;
1466   case MAT_NEW_NONZERO_LOCATIONS:
1467     a->nonew          = (flg ? 0 : 1);
1468     break;
1469   case MAT_NEW_NONZERO_LOCATION_ERR:
1470     a->nonew          = (flg ? -1 : 0);
1471     break;
1472   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1473     a->nonew          = (flg ? -2 : 0);
1474     break;
1475   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1476     a->nounused       = (flg ? -1 : 0);
1477     break;
1478   case MAT_CHECK_COMPRESSED_ROW:
1479     a->compressedrow.check = flg;
1480     break;
1481   case MAT_NEW_DIAGONALS:
1482   case MAT_IGNORE_OFF_PROC_ENTRIES:
1483   case MAT_USE_HASH_TABLE:
1484     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1485     break;
1486   case MAT_SPD:
1487   case MAT_SYMMETRIC:
1488   case MAT_STRUCTURALLY_SYMMETRIC:
1489   case MAT_HERMITIAN:
1490   case MAT_SYMMETRY_ETERNAL:
1491     /* These options are handled directly by MatSetOption() */
1492     break;
1493   default:
1494     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1495   }
1496   PetscFunctionReturn(0);
1497 }
1498 
1499 #undef __FUNCT__
1500 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1501 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1502 {
1503   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1504   PetscErrorCode ierr;
1505   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1506   MatScalar      *aa,*aa_i;
1507   PetscScalar    *v_i;
1508 
1509   PetscFunctionBegin;
1510   bs  = A->rmap->bs;
1511   ai  = a->i;
1512   aj  = a->j;
1513   aa  = a->a;
1514   bs2 = a->bs2;
1515 
1516   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1517 
1518   bn  = row/bs;   /* Block number */
1519   bp  = row % bs; /* Block Position */
1520   M   = ai[bn+1] - ai[bn];
1521   *nz = bs*M;
1522 
1523   if (v) {
1524     *v = 0;
1525     if (*nz) {
1526       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1527       for (i=0; i<M; i++) { /* for each block in the block row */
1528         v_i  = *v + i*bs;
1529         aa_i = aa + bs2*(ai[bn] + i);
1530         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1531       }
1532     }
1533   }
1534 
1535   if (idx) {
1536     *idx = 0;
1537     if (*nz) {
1538       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1539       for (i=0; i<M; i++) { /* for each block in the block row */
1540         idx_i = *idx + i*bs;
1541         itmp  = bs*aj[ai[bn] + i];
1542         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1543       }
1544     }
1545   }
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 #undef __FUNCT__
1550 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1551 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1552 {
1553   PetscErrorCode ierr;
1554 
1555   PetscFunctionBegin;
1556   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1557   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1562 
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1565 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1566 {
1567   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1568   Mat            C;
1569   PetscErrorCode ierr;
1570   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1571   PetscInt       *rows,*cols,bs2=a->bs2;
1572   MatScalar      *array;
1573 
1574   PetscFunctionBegin;
1575   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1576   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1577     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1578     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1579 
1580     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1581     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1582     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1583     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1584     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1585     ierr = PetscFree(col);CHKERRQ(ierr);
1586   } else {
1587     C = *B;
1588   }
1589 
1590   array = a->a;
1591   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1592   for (i=0; i<mbs; i++) {
1593     cols[0] = i*bs;
1594     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1595     len = ai[i+1] - ai[i];
1596     for (j=0; j<len; j++) {
1597       rows[0] = (*aj++)*bs;
1598       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1599       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1600       array += bs2;
1601     }
1602   }
1603   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1604 
1605   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1606   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1607 
1608   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1609     *B = C;
1610   } else {
1611     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1612   }
1613   PetscFunctionReturn(0);
1614 }
1615 
1616 EXTERN_C_BEGIN
1617 #undef __FUNCT__
1618 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1619 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1620 {
1621   PetscErrorCode ierr;
1622   Mat            Btrans;
1623 
1624   PetscFunctionBegin;
1625   *f = PETSC_FALSE;
1626   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1627   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1628   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1629   PetscFunctionReturn(0);
1630 }
1631 EXTERN_C_END
1632 
1633 #undef __FUNCT__
1634 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1635 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1636 {
1637   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1638   PetscErrorCode ierr;
1639   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1640   int            fd;
1641   PetscScalar    *aa;
1642   FILE           *file;
1643 
1644   PetscFunctionBegin;
1645   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1646   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1647   col_lens[0] = MAT_FILE_CLASSID;
1648 
1649   col_lens[1] = A->rmap->N;
1650   col_lens[2] = A->cmap->n;
1651   col_lens[3] = a->nz*bs2;
1652 
1653   /* store lengths of each row and write (including header) to file */
1654   count = 0;
1655   for (i=0; i<a->mbs; i++) {
1656     for (j=0; j<bs; j++) {
1657       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1658     }
1659   }
1660   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1661   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1662 
1663   /* store column indices (zero start index) */
1664   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1665   count = 0;
1666   for (i=0; i<a->mbs; i++) {
1667     for (j=0; j<bs; j++) {
1668       for (k=a->i[i]; k<a->i[i+1]; k++) {
1669         for (l=0; l<bs; l++) {
1670           jj[count++] = bs*a->j[k] + l;
1671         }
1672       }
1673     }
1674   }
1675   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1676   ierr = PetscFree(jj);CHKERRQ(ierr);
1677 
1678   /* store nonzero values */
1679   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1680   count = 0;
1681   for (i=0; i<a->mbs; i++) {
1682     for (j=0; j<bs; j++) {
1683       for (k=a->i[i]; k<a->i[i+1]; k++) {
1684         for (l=0; l<bs; l++) {
1685           aa[count++] = a->a[bs2*k + l*bs + j];
1686         }
1687       }
1688     }
1689   }
1690   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1691   ierr = PetscFree(aa);CHKERRQ(ierr);
1692 
1693   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1694   if (file) {
1695     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1696   }
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1702 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1703 {
1704   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1705   PetscErrorCode    ierr;
1706   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1707   PetscViewerFormat format;
1708 
1709   PetscFunctionBegin;
1710   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1711   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1712     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1713   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1714     Mat aij;
1715     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1716     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1717     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1718   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1719      PetscFunctionReturn(0);
1720   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1721     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1722     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1723     for (i=0; i<a->mbs; i++) {
1724       for (j=0; j<bs; j++) {
1725         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1726         for (k=a->i[i]; k<a->i[i+1]; k++) {
1727           for (l=0; l<bs; l++) {
1728 #if defined(PETSC_USE_COMPLEX)
1729             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1730               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1731                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1732             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1733               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1734                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1735             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1736               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1737             }
1738 #else
1739             if (a->a[bs2*k + l*bs + j] != 0.0) {
1740               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1741             }
1742 #endif
1743           }
1744         }
1745         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1746       }
1747     }
1748     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1749   } else {
1750     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1751     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1752     for (i=0; i<a->mbs; i++) {
1753       for (j=0; j<bs; j++) {
1754         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1755         for (k=a->i[i]; k<a->i[i+1]; k++) {
1756           for (l=0; l<bs; l++) {
1757 #if defined(PETSC_USE_COMPLEX)
1758             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1759               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1760                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1761             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1762               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1763                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1764             } else {
1765               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1766             }
1767 #else
1768             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1769 #endif
1770           }
1771         }
1772         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1773       }
1774     }
1775     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1776   }
1777   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1778   PetscFunctionReturn(0);
1779 }
1780 
1781 #undef __FUNCT__
1782 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1783 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1784 {
1785   Mat            A = (Mat) Aa;
1786   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1787   PetscErrorCode ierr;
1788   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1789   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1790   MatScalar      *aa;
1791   PetscViewer    viewer;
1792   PetscViewerFormat format;
1793 
1794   PetscFunctionBegin;
1795   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1796   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1797 
1798   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1799 
1800   /* loop over matrix elements drawing boxes */
1801 
1802   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1803     color = PETSC_DRAW_BLUE;
1804     for (i=0,row=0; i<mbs; i++,row+=bs) {
1805       for (j=a->i[i]; j<a->i[i+1]; j++) {
1806         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1807         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1808         aa = a->a + j*bs2;
1809         for (k=0; k<bs; k++) {
1810           for (l=0; l<bs; l++) {
1811             if (PetscRealPart(*aa++) >=  0.) continue;
1812             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1813           }
1814         }
1815       }
1816     }
1817     color = PETSC_DRAW_CYAN;
1818     for (i=0,row=0; i<mbs; i++,row+=bs) {
1819       for (j=a->i[i]; j<a->i[i+1]; j++) {
1820         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1821         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1822         aa = a->a + j*bs2;
1823         for (k=0; k<bs; k++) {
1824           for (l=0; l<bs; l++) {
1825             if (PetscRealPart(*aa++) != 0.) continue;
1826             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1827           }
1828         }
1829       }
1830     }
1831     color = PETSC_DRAW_RED;
1832     for (i=0,row=0; i<mbs; i++,row+=bs) {
1833       for (j=a->i[i]; j<a->i[i+1]; j++) {
1834         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1835         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1836         aa = a->a + j*bs2;
1837         for (k=0; k<bs; k++) {
1838           for (l=0; l<bs; l++) {
1839             if (PetscRealPart(*aa++) <= 0.) continue;
1840             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1841           }
1842         }
1843       }
1844     }
1845   } else {
1846     /* use contour shading to indicate magnitude of values */
1847     /* first determine max of all nonzero values */
1848     PetscDraw   popup;
1849     PetscReal scale,maxv = 0.0;
1850 
1851     for (i=0; i<a->nz*a->bs2; i++) {
1852       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1853     }
1854     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1855     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1856     if (popup) {ierr  = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);}
1857     for (i=0,row=0; i<mbs; i++,row+=bs) {
1858       for (j=a->i[i]; j<a->i[i+1]; j++) {
1859         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1860         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1861         aa = a->a + j*bs2;
1862         for (k=0; k<bs; k++) {
1863           for (l=0; l<bs; l++) {
1864             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1865             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1866           }
1867         }
1868       }
1869     }
1870   }
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1876 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1877 {
1878   PetscErrorCode ierr;
1879   PetscReal      xl,yl,xr,yr,w,h;
1880   PetscDraw      draw;
1881   PetscBool      isnull;
1882 
1883   PetscFunctionBegin;
1884 
1885   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1886   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1887 
1888   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1889   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1890   xr += w;    yr += h;  xl = -w;     yl = -h;
1891   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1892   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1893   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 #undef __FUNCT__
1898 #define __FUNCT__ "MatView_SeqBAIJ"
1899 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1900 {
1901   PetscErrorCode ierr;
1902   PetscBool      iascii,isbinary,isdraw;
1903 
1904   PetscFunctionBegin;
1905   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1906   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1907   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1908   if (iascii){
1909     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1910   } else if (isbinary) {
1911     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1912   } else if (isdraw) {
1913     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1914   } else {
1915     Mat B;
1916     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1917     ierr = MatView(B,viewer);CHKERRQ(ierr);
1918     ierr = MatDestroy(&B);CHKERRQ(ierr);
1919   }
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 
1924 #undef __FUNCT__
1925 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1926 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1927 {
1928   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1929   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1930   PetscInt    *ai = a->i,*ailen = a->ilen;
1931   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1932   MatScalar   *ap,*aa = a->a;
1933 
1934   PetscFunctionBegin;
1935   for (k=0; k<m; k++) { /* loop over rows */
1936     row  = im[k]; brow = row/bs;
1937     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1938     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1939     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1940     nrow = ailen[brow];
1941     for (l=0; l<n; l++) { /* loop over columns */
1942       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1943       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1944       col  = in[l] ;
1945       bcol = col/bs;
1946       cidx = col%bs;
1947       ridx = row%bs;
1948       high = nrow;
1949       low  = 0; /* assume unsorted */
1950       while (high-low > 5) {
1951         t = (low+high)/2;
1952         if (rp[t] > bcol) high = t;
1953         else             low  = t;
1954       }
1955       for (i=low; i<high; i++) {
1956         if (rp[i] > bcol) break;
1957         if (rp[i] == bcol) {
1958           *v++ = ap[bs2*i+bs*cidx+ridx];
1959           goto finished;
1960         }
1961       }
1962       *v++ = 0.0;
1963       finished:;
1964     }
1965   }
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 #undef __FUNCT__
1970 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1971 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1972 {
1973   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1974   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1975   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1976   PetscErrorCode    ierr;
1977   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1978   PetscBool         roworiented=a->roworiented;
1979   const PetscScalar *value = v;
1980   MatScalar         *ap,*aa = a->a,*bap;
1981 
1982   PetscFunctionBegin;
1983   if (roworiented) {
1984     stepval = (n-1)*bs;
1985   } else {
1986     stepval = (m-1)*bs;
1987   }
1988   for (k=0; k<m; k++) { /* loop over added rows */
1989     row  = im[k];
1990     if (row < 0) continue;
1991 #if defined(PETSC_USE_DEBUG)
1992     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1993 #endif
1994     rp   = aj + ai[row];
1995     ap   = aa + bs2*ai[row];
1996     rmax = imax[row];
1997     nrow = ailen[row];
1998     low  = 0;
1999     high = nrow;
2000     for (l=0; l<n; l++) { /* loop over added columns */
2001       if (in[l] < 0) continue;
2002 #if defined(PETSC_USE_DEBUG)
2003       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
2004 #endif
2005       col = in[l];
2006       if (roworiented) {
2007         value = v + (k*(stepval+bs) + l)*bs;
2008       } else {
2009         value = v + (l*(stepval+bs) + k)*bs;
2010       }
2011       if (col <= lastcol) low = 0; else high = nrow;
2012       lastcol = col;
2013       while (high-low > 7) {
2014         t = (low+high)/2;
2015         if (rp[t] > col) high = t;
2016         else             low  = t;
2017       }
2018       for (i=low; i<high; i++) {
2019         if (rp[i] > col) break;
2020         if (rp[i] == col) {
2021           bap  = ap +  bs2*i;
2022           if (roworiented) {
2023             if (is == ADD_VALUES) {
2024               for (ii=0; ii<bs; ii++,value+=stepval) {
2025                 for (jj=ii; jj<bs2; jj+=bs) {
2026                   bap[jj] += *value++;
2027                 }
2028               }
2029             } else {
2030               for (ii=0; ii<bs; ii++,value+=stepval) {
2031                 for (jj=ii; jj<bs2; jj+=bs) {
2032                   bap[jj] = *value++;
2033                 }
2034               }
2035             }
2036           } else {
2037             if (is == ADD_VALUES) {
2038               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2039                 for (jj=0; jj<bs; jj++) {
2040                   bap[jj] += value[jj];
2041                 }
2042                 bap += bs;
2043               }
2044             } else {
2045               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2046                 for (jj=0; jj<bs; jj++) {
2047                   bap[jj]  = value[jj];
2048                 }
2049                 bap += bs;
2050               }
2051             }
2052           }
2053           goto noinsert2;
2054         }
2055       }
2056       if (nonew == 1) goto noinsert2;
2057       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2058       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2059       N = nrow++ - 1; high++;
2060       /* shift up all the later entries in this row */
2061       for (ii=N; ii>=i; ii--) {
2062         rp[ii+1] = rp[ii];
2063         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2064       }
2065       if (N >= i) {
2066         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2067       }
2068       rp[i] = col;
2069       bap   = ap +  bs2*i;
2070       if (roworiented) {
2071         for (ii=0; ii<bs; ii++,value+=stepval) {
2072           for (jj=ii; jj<bs2; jj+=bs) {
2073             bap[jj] = *value++;
2074           }
2075         }
2076       } else {
2077         for (ii=0; ii<bs; ii++,value+=stepval) {
2078           for (jj=0; jj<bs; jj++) {
2079             *bap++  = *value++;
2080           }
2081         }
2082       }
2083       noinsert2:;
2084       low = i;
2085     }
2086     ailen[row] = nrow;
2087   }
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 #undef __FUNCT__
2092 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2093 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2094 {
2095   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2096   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2097   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2098   PetscErrorCode ierr;
2099   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2100   MatScalar      *aa = a->a,*ap;
2101   PetscReal      ratio=0.6;
2102 
2103   PetscFunctionBegin;
2104   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2105 
2106   if (m) rmax = ailen[0];
2107   for (i=1; i<mbs; i++) {
2108     /* move each row back by the amount of empty slots (fshift) before it*/
2109     fshift += imax[i-1] - ailen[i-1];
2110     rmax   = PetscMax(rmax,ailen[i]);
2111     if (fshift) {
2112       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2113       N = ailen[i];
2114       for (j=0; j<N; j++) {
2115         ip[j-fshift] = ip[j];
2116         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2117       }
2118     }
2119     ai[i] = ai[i-1] + ailen[i-1];
2120   }
2121   if (mbs) {
2122     fshift += imax[mbs-1] - ailen[mbs-1];
2123     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2124   }
2125   /* reset ilen and imax for each row */
2126   for (i=0; i<mbs; i++) {
2127     ailen[i] = imax[i] = ai[i+1] - ai[i];
2128   }
2129   a->nz = ai[mbs];
2130 
2131   /* diagonals may have moved, so kill the diagonal pointers */
2132   a->idiagvalid = PETSC_FALSE;
2133   if (fshift && a->diag) {
2134     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2135     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2136     a->diag = 0;
2137   }
2138   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2139   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2140   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2141   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2142   A->info.mallocs     += a->reallocs;
2143   a->reallocs          = 0;
2144   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2145 
2146   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2147   A->same_nonzero = PETSC_TRUE;
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 /*
2152    This function returns an array of flags which indicate the locations of contiguous
2153    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2154    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2155    Assume: sizes should be long enough to hold all the values.
2156 */
2157 #undef __FUNCT__
2158 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2159 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2160 {
2161   PetscInt   i,j,k,row;
2162   PetscBool  flg;
2163 
2164   PetscFunctionBegin;
2165   for (i=0,j=0; i<n; j++) {
2166     row = idx[i];
2167     if (row%bs!=0) { /* Not the begining of a block */
2168       sizes[j] = 1;
2169       i++;
2170     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2171       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2172       i++;
2173     } else { /* Begining of the block, so check if the complete block exists */
2174       flg = PETSC_TRUE;
2175       for (k=1; k<bs; k++) {
2176         if (row+k != idx[i+k]) { /* break in the block */
2177           flg = PETSC_FALSE;
2178           break;
2179         }
2180       }
2181       if (flg) { /* No break in the bs */
2182         sizes[j] = bs;
2183         i+= bs;
2184       } else {
2185         sizes[j] = 1;
2186         i++;
2187       }
2188     }
2189   }
2190   *bs_max = j;
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 #undef __FUNCT__
2195 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2196 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2197 {
2198   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2199   PetscErrorCode    ierr;
2200   PetscInt          i,j,k,count,*rows;
2201   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2202   PetscScalar       zero = 0.0;
2203   MatScalar         *aa;
2204   const PetscScalar *xx;
2205   PetscScalar       *bb;
2206 
2207   PetscFunctionBegin;
2208   /* fix right hand side if needed */
2209   if (x && b) {
2210     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2211     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2212     for (i=0; i<is_n; i++) {
2213       bb[is_idx[i]] = diag*xx[is_idx[i]];
2214     }
2215     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2216     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2217   }
2218 
2219   /* Make a copy of the IS and  sort it */
2220   /* allocate memory for rows,sizes */
2221   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2222 
2223   /* copy IS values to rows, and sort them */
2224   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2225   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2226 
2227   if (baij->keepnonzeropattern) {
2228     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2229     bs_max = is_n;
2230     A->same_nonzero = PETSC_TRUE;
2231   } else {
2232     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2233     A->same_nonzero = PETSC_FALSE;
2234   }
2235 
2236   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2237     row   = rows[j];
2238     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2239     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2240     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2241     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2242       if (diag != (PetscScalar)0.0) {
2243         if (baij->ilen[row/bs] > 0) {
2244           baij->ilen[row/bs]       = 1;
2245           baij->j[baij->i[row/bs]] = row/bs;
2246           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2247         }
2248         /* Now insert all the diagonal values for this bs */
2249         for (k=0; k<bs; k++) {
2250           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2251         }
2252       } else { /* (diag == 0.0) */
2253         baij->ilen[row/bs] = 0;
2254       } /* end (diag == 0.0) */
2255     } else { /* (sizes[i] != bs) */
2256 #if defined (PETSC_USE_DEBUG)
2257       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2258 #endif
2259       for (k=0; k<count; k++) {
2260         aa[0] =  zero;
2261         aa    += bs;
2262       }
2263       if (diag != (PetscScalar)0.0) {
2264         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2265       }
2266     }
2267   }
2268 
2269   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2270   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2271   PetscFunctionReturn(0);
2272 }
2273 
2274 #undef __FUNCT__
2275 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2276 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2277 {
2278   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2279   PetscErrorCode    ierr;
2280   PetscInt          i,j,k,count;
2281   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2282   PetscScalar       zero = 0.0;
2283   MatScalar         *aa;
2284   const PetscScalar *xx;
2285   PetscScalar       *bb;
2286   PetscBool         *zeroed,vecs = PETSC_FALSE;
2287 
2288   PetscFunctionBegin;
2289   /* fix right hand side if needed */
2290   if (x && b) {
2291     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2292     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2293     vecs = PETSC_TRUE;
2294   }
2295   A->same_nonzero = PETSC_TRUE;
2296 
2297   /* zero the columns */
2298   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2299   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2300   for (i=0; i<is_n; i++) {
2301     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2302     zeroed[is_idx[i]] = PETSC_TRUE;
2303   }
2304   for (i=0; i<A->rmap->N; i++) {
2305     if (!zeroed[i]) {
2306       row = i/bs;
2307       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2308         for (k=0; k<bs; k++) {
2309           col = bs*baij->j[j] + k;
2310 	  if (zeroed[col]) {
2311 	    aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2312             if (vecs) bb[i] -= aa[0]*xx[col];
2313             aa[0] = 0.0;
2314           }
2315         }
2316       }
2317     } else if (vecs) bb[i] = diag*xx[i];
2318   }
2319   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2320   if (vecs) {
2321     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2322     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2323   }
2324 
2325   /* zero the rows */
2326   for (i=0; i<is_n; i++) {
2327     row   = is_idx[i];
2328     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2329     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2330     for (k=0; k<count; k++) {
2331       aa[0] =  zero;
2332       aa    += bs;
2333     }
2334     if (diag != (PetscScalar)0.0) {
2335       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2336     }
2337   }
2338   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2339   PetscFunctionReturn(0);
2340 }
2341 
2342 #undef __FUNCT__
2343 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2344 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2345 {
2346   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2347   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2348   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2349   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2350   PetscErrorCode ierr;
2351   PetscInt       ridx,cidx,bs2=a->bs2;
2352   PetscBool      roworiented=a->roworiented;
2353   MatScalar      *ap,value,*aa=a->a,*bap;
2354 
2355   PetscFunctionBegin;
2356   if (v) PetscValidScalarPointer(v,6);
2357   for (k=0; k<m; k++) { /* loop over added rows */
2358     row  = im[k];
2359     brow = row/bs;
2360     if (row < 0) continue;
2361 #if defined(PETSC_USE_DEBUG)
2362     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2363 #endif
2364     rp   = aj + ai[brow];
2365     ap   = aa + bs2*ai[brow];
2366     rmax = imax[brow];
2367     nrow = ailen[brow];
2368     low  = 0;
2369     high = nrow;
2370     for (l=0; l<n; l++) { /* loop over added columns */
2371       if (in[l] < 0) continue;
2372 #if defined(PETSC_USE_DEBUG)
2373       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2374 #endif
2375       col = in[l]; bcol = col/bs;
2376       ridx = row % bs; cidx = col % bs;
2377       if (roworiented) {
2378         value = v[l + k*n];
2379       } else {
2380         value = v[k + l*m];
2381       }
2382       if (col <= lastcol) low = 0; else high = nrow;
2383       lastcol = col;
2384       while (high-low > 7) {
2385         t = (low+high)/2;
2386         if (rp[t] > bcol) high = t;
2387         else              low  = t;
2388       }
2389       for (i=low; i<high; i++) {
2390         if (rp[i] > bcol) break;
2391         if (rp[i] == bcol) {
2392           bap  = ap +  bs2*i + bs*cidx + ridx;
2393           if (is == ADD_VALUES) *bap += value;
2394           else                  *bap  = value;
2395           goto noinsert1;
2396         }
2397       }
2398       if (nonew == 1) goto noinsert1;
2399       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2400       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2401       N = nrow++ - 1; high++;
2402       /* shift up all the later entries in this row */
2403       for (ii=N; ii>=i; ii--) {
2404         rp[ii+1] = rp[ii];
2405         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2406       }
2407       if (N>=i) {
2408         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2409       }
2410       rp[i]                      = bcol;
2411       ap[bs2*i + bs*cidx + ridx] = value;
2412       a->nz++;
2413       noinsert1:;
2414       low = i;
2415     }
2416     ailen[brow] = nrow;
2417   }
2418   A->same_nonzero = PETSC_FALSE;
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 #undef __FUNCT__
2423 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2424 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2425 {
2426   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2427   Mat            outA;
2428   PetscErrorCode ierr;
2429   PetscBool      row_identity,col_identity;
2430 
2431   PetscFunctionBegin;
2432   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2433   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2434   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2435   if (!row_identity || !col_identity) {
2436     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2437   }
2438 
2439   outA            = inA;
2440   inA->factortype = MAT_FACTOR_LU;
2441 
2442   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2443 
2444   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2445   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2446   a->row = row;
2447   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2448   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2449   a->col = col;
2450 
2451   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2452   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2453    ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2454   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2455 
2456   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2457   if (!a->solve_work) {
2458     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2459     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2460   }
2461   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2462 
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 EXTERN_C_BEGIN
2467 #undef __FUNCT__
2468 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2469 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2470 {
2471   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2472   PetscInt    i,nz,mbs;
2473 
2474   PetscFunctionBegin;
2475   nz  = baij->maxnz;
2476   mbs = baij->mbs;
2477   for (i=0; i<nz; i++) {
2478     baij->j[i] = indices[i];
2479   }
2480   baij->nz = nz;
2481   for (i=0; i<mbs; i++) {
2482     baij->ilen[i] = baij->imax[i];
2483   }
2484   PetscFunctionReturn(0);
2485 }
2486 EXTERN_C_END
2487 
2488 #undef __FUNCT__
2489 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2490 /*@
2491     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2492        in the matrix.
2493 
2494   Input Parameters:
2495 +  mat - the SeqBAIJ matrix
2496 -  indices - the column indices
2497 
2498   Level: advanced
2499 
2500   Notes:
2501     This can be called if you have precomputed the nonzero structure of the
2502   matrix and want to provide it to the matrix object to improve the performance
2503   of the MatSetValues() operation.
2504 
2505     You MUST have set the correct numbers of nonzeros per row in the call to
2506   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2507 
2508     MUST be called before any calls to MatSetValues();
2509 
2510 @*/
2511 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2512 {
2513   PetscErrorCode ierr;
2514 
2515   PetscFunctionBegin;
2516   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2517   PetscValidPointer(indices,2);
2518   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 #undef __FUNCT__
2523 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2524 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2525 {
2526   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2527   PetscErrorCode ierr;
2528   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2529   PetscReal      atmp;
2530   PetscScalar    *x,zero = 0.0;
2531   MatScalar      *aa;
2532   PetscInt       ncols,brow,krow,kcol;
2533 
2534   PetscFunctionBegin;
2535   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2536   bs   = A->rmap->bs;
2537   aa   = a->a;
2538   ai   = a->i;
2539   aj   = a->j;
2540   mbs  = a->mbs;
2541 
2542   ierr = VecSet(v,zero);CHKERRQ(ierr);
2543   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2544   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2545   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2546   for (i=0; i<mbs; i++) {
2547     ncols = ai[1] - ai[0]; ai++;
2548     brow  = bs*i;
2549     for (j=0; j<ncols; j++){
2550       for (kcol=0; kcol<bs; kcol++){
2551         for (krow=0; krow<bs; krow++){
2552           atmp = PetscAbsScalar(*aa);aa++;
2553           row = brow + krow;    /* row index */
2554           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2555           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2556         }
2557       }
2558       aj++;
2559     }
2560   }
2561   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 #undef __FUNCT__
2566 #define __FUNCT__ "MatCopy_SeqBAIJ"
2567 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2568 {
2569   PetscErrorCode ierr;
2570 
2571   PetscFunctionBegin;
2572   /* If the two matrices have the same copy implementation, use fast copy. */
2573   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2574     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2575     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2576     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2577 
2578     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2579     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2580     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2581   } else {
2582     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2583   }
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 #undef __FUNCT__
2588 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2589 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2590 {
2591   PetscErrorCode ierr;
2592 
2593   PetscFunctionBegin;
2594   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2595   PetscFunctionReturn(0);
2596 }
2597 
2598 #undef __FUNCT__
2599 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2600 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2601 {
2602   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2603   PetscFunctionBegin;
2604   *array = a->a;
2605   PetscFunctionReturn(0);
2606 }
2607 
2608 #undef __FUNCT__
2609 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2610 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2611 {
2612   PetscFunctionBegin;
2613   PetscFunctionReturn(0);
2614 }
2615 
2616 #undef __FUNCT__
2617 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2618 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2619 {
2620   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2621   PetscErrorCode ierr;
2622   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2623   PetscBLASInt   one=1;
2624 
2625   PetscFunctionBegin;
2626   if (str == SAME_NONZERO_PATTERN) {
2627     PetscScalar alpha = a;
2628     PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2);
2629     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2630   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2631     if (y->xtoy && y->XtoY != X) {
2632       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2633       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2634     }
2635     if (!y->xtoy) { /* get xtoy */
2636       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2637       y->XtoY = X;
2638       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2639     }
2640     for (i=0; i<x->nz; i++) {
2641       j = 0;
2642       while (j < bs2){
2643         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2644         j++;
2645       }
2646     }
2647     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2648   } else {
2649     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2650   }
2651   PetscFunctionReturn(0);
2652 }
2653 
2654 #undef __FUNCT__
2655 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2656 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2657 {
2658   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2659   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2660   MatScalar      *aa = a->a;
2661 
2662   PetscFunctionBegin;
2663   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2664   PetscFunctionReturn(0);
2665 }
2666 
2667 #undef __FUNCT__
2668 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2669 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2670 {
2671   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2672   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2673   MatScalar      *aa = a->a;
2674 
2675   PetscFunctionBegin;
2676   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2677   PetscFunctionReturn(0);
2678 }
2679 
2680 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2681 
2682 #undef __FUNCT__
2683 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2684 /*
2685     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2686 */
2687 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2688 {
2689   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2690   PetscErrorCode ierr;
2691   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2692   PetscInt       nz = a->i[m],row,*jj,mr,col;
2693 
2694   PetscFunctionBegin;
2695   *nn = n;
2696   if (!ia) PetscFunctionReturn(0);
2697   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2698   else {
2699     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2700     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2701     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2702     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2703     jj = a->j;
2704     for (i=0; i<nz; i++) {
2705       collengths[jj[i]]++;
2706     }
2707     cia[0] = oshift;
2708     for (i=0; i<n; i++) {
2709       cia[i+1] = cia[i] + collengths[i];
2710     }
2711     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2712     jj   = a->j;
2713     for (row=0; row<m; row++) {
2714       mr = a->i[row+1] - a->i[row];
2715       for (i=0; i<mr; i++) {
2716         col = *jj++;
2717         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2718       }
2719     }
2720     ierr = PetscFree(collengths);CHKERRQ(ierr);
2721     *ia = cia; *ja = cja;
2722   }
2723   PetscFunctionReturn(0);
2724 }
2725 
2726 #undef __FUNCT__
2727 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2728 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2729 {
2730   PetscErrorCode ierr;
2731 
2732   PetscFunctionBegin;
2733   if (!ia) PetscFunctionReturn(0);
2734   ierr = PetscFree(*ia);CHKERRQ(ierr);
2735   ierr = PetscFree(*ja);CHKERRQ(ierr);
2736   PetscFunctionReturn(0);
2737 }
2738 
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2741 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2742 {
2743   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2744   PetscErrorCode ierr;
2745   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2746   PetscScalar    dx,*y,*xx,*w3_array;
2747   PetscScalar    *vscale_array;
2748   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2749   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2750   void           *fctx = coloring->fctx;
2751   PetscBool      flg = PETSC_FALSE;
2752   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2753   Vec            x1_tmp;
2754 
2755   PetscFunctionBegin;
2756   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2757   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2758   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2759   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2760 
2761   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2762   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2763   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2764   if (flg) {
2765     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2766   } else {
2767     PetscBool  assembled;
2768     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2769     if (assembled) {
2770       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2771     }
2772   }
2773 
2774   x1_tmp = x1;
2775   if (!coloring->vscale){
2776     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2777   }
2778 
2779   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2780     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2781   }
2782   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2783 
2784   /* Set w1 = F(x1) */
2785   if (!coloring->fset) {
2786     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2787     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2788     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2789   } else {
2790     coloring->fset = PETSC_FALSE;
2791   }
2792 
2793   if (!coloring->w3) {
2794     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2795     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2796   }
2797   w3 = coloring->w3;
2798 
2799     CHKMEMQ;
2800     /* Compute all the local scale factors, including ghost points */
2801   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2802   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2803   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2804   if (ctype == IS_COLORING_GHOSTED){
2805     col_start = 0; col_end = N;
2806   } else if (ctype == IS_COLORING_GLOBAL){
2807     xx = xx - start;
2808     vscale_array = vscale_array - start;
2809     col_start = start; col_end = N + start;
2810   }    CHKMEMQ;
2811   for (col=col_start; col<col_end; col++){
2812     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2813     if (coloring->htype[0] == 'w') {
2814       dx = 1.0 + unorm;
2815     } else {
2816       dx  = xx[col];
2817     }
2818     if (dx == (PetscScalar)0.0) dx = 1.0;
2819 #if !defined(PETSC_USE_COMPLEX)
2820     if (dx < umin && dx >= 0.0)      dx = umin;
2821     else if (dx < 0.0 && dx > -umin) dx = -umin;
2822 #else
2823     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2824     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2825 #endif
2826     dx               *= epsilon;
2827     vscale_array[col] = (PetscScalar)1.0/dx;
2828   }     CHKMEMQ;
2829   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2830   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2831   if (ctype == IS_COLORING_GLOBAL){
2832     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2833     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2834   }
2835   CHKMEMQ;
2836   if (coloring->vscaleforrow) {
2837     vscaleforrow = coloring->vscaleforrow;
2838   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2839 
2840   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2841   /*
2842     Loop over each color
2843   */
2844   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2845   for (k=0; k<coloring->ncolors; k++) {
2846     coloring->currentcolor = k;
2847     for (i=0; i<bs; i++) {
2848       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2849       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2850       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2851       /*
2852 	Loop over each column associated with color
2853 	adding the perturbation to the vector w3.
2854       */
2855       for (l=0; l<coloring->ncolumns[k]; l++) {
2856 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2857 	if (coloring->htype[0] == 'w') {
2858 	  dx = 1.0 + unorm;
2859 	} else {
2860 	  dx  = xx[col];
2861 	}
2862 	if (dx == (PetscScalar)0.0) dx = 1.0;
2863 #if !defined(PETSC_USE_COMPLEX)
2864 	if (dx < umin && dx >= 0.0)      dx = umin;
2865 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2866 #else
2867 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2868 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2869 #endif
2870 	dx            *= epsilon;
2871 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2872 	w3_array[col] += dx;
2873       }
2874       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2875       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2876 
2877       /*
2878 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2879 	w2 = F(x1 + dx) - F(x1)
2880       */
2881       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2882       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2883       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2884       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2885 
2886       /*
2887 	Loop over rows of vector, putting results into Jacobian matrix
2888       */
2889       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2890       for (l=0; l<coloring->nrows[k]; l++) {
2891 	row    = bs*coloring->rows[k][l];             /* local row index */
2892 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2893         for (j=0; j<bs; j++) {
2894   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2895           srows[j]  = row + start + j;
2896         }
2897 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2898       }
2899       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2900     }
2901   } /* endof for each color */
2902   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2903   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2904   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2905   ierr = PetscFree(srows);CHKERRQ(ierr);
2906 
2907   coloring->currentcolor = -1;
2908   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2909   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2910   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2911   PetscFunctionReturn(0);
2912 }
2913 
2914 /* -------------------------------------------------------------------*/
2915 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2916        MatGetRow_SeqBAIJ,
2917        MatRestoreRow_SeqBAIJ,
2918        MatMult_SeqBAIJ_N,
2919 /* 4*/ MatMultAdd_SeqBAIJ_N,
2920        MatMultTranspose_SeqBAIJ,
2921        MatMultTransposeAdd_SeqBAIJ,
2922        0,
2923        0,
2924        0,
2925 /*10*/ 0,
2926        MatLUFactor_SeqBAIJ,
2927        0,
2928        0,
2929        MatTranspose_SeqBAIJ,
2930 /*15*/ MatGetInfo_SeqBAIJ,
2931        MatEqual_SeqBAIJ,
2932        MatGetDiagonal_SeqBAIJ,
2933        MatDiagonalScale_SeqBAIJ,
2934        MatNorm_SeqBAIJ,
2935 /*20*/ 0,
2936        MatAssemblyEnd_SeqBAIJ,
2937        MatSetOption_SeqBAIJ,
2938        MatZeroEntries_SeqBAIJ,
2939 /*24*/ MatZeroRows_SeqBAIJ,
2940        0,
2941        0,
2942        0,
2943        0,
2944 /*29*/ MatSetUp_SeqBAIJ,
2945        0,
2946        0,
2947        0,
2948        0,
2949 /*34*/ MatDuplicate_SeqBAIJ,
2950        0,
2951        0,
2952        MatILUFactor_SeqBAIJ,
2953        0,
2954 /*39*/ MatAXPY_SeqBAIJ,
2955        MatGetSubMatrices_SeqBAIJ,
2956        MatIncreaseOverlap_SeqBAIJ,
2957        MatGetValues_SeqBAIJ,
2958        MatCopy_SeqBAIJ,
2959 /*44*/ 0,
2960        MatScale_SeqBAIJ,
2961        0,
2962        0,
2963        MatZeroRowsColumns_SeqBAIJ,
2964 /*49*/ 0,
2965        MatGetRowIJ_SeqBAIJ,
2966        MatRestoreRowIJ_SeqBAIJ,
2967        MatGetColumnIJ_SeqBAIJ,
2968        MatRestoreColumnIJ_SeqBAIJ,
2969 /*54*/ MatFDColoringCreate_SeqAIJ,
2970        0,
2971        0,
2972        0,
2973        MatSetValuesBlocked_SeqBAIJ,
2974 /*59*/ MatGetSubMatrix_SeqBAIJ,
2975        MatDestroy_SeqBAIJ,
2976        MatView_SeqBAIJ,
2977        0,
2978        0,
2979 /*64*/ 0,
2980        0,
2981        0,
2982        0,
2983        0,
2984 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2985        0,
2986        MatConvert_Basic,
2987        0,
2988        0,
2989 /*74*/ 0,
2990        MatFDColoringApply_BAIJ,
2991        0,
2992        0,
2993        0,
2994 /*79*/ 0,
2995        0,
2996        0,
2997        0,
2998        MatLoad_SeqBAIJ,
2999 /*84*/ 0,
3000        0,
3001        0,
3002        0,
3003        0,
3004 /*89*/ 0,
3005        0,
3006        0,
3007        0,
3008        0,
3009 /*94*/ 0,
3010        0,
3011        0,
3012        0,
3013        0,
3014 /*99*/0,
3015        0,
3016        0,
3017        0,
3018        0,
3019 /*104*/0,
3020        MatRealPart_SeqBAIJ,
3021        MatImaginaryPart_SeqBAIJ,
3022        0,
3023        0,
3024 /*109*/0,
3025        0,
3026        0,
3027        0,
3028        MatMissingDiagonal_SeqBAIJ,
3029 /*114*/0,
3030        0,
3031        0,
3032        0,
3033        0,
3034 /*119*/0,
3035        0,
3036        MatMultHermitianTranspose_SeqBAIJ,
3037        MatMultHermitianTransposeAdd_SeqBAIJ,
3038        0,
3039 /*124*/0,
3040        0,
3041        MatInvertBlockDiagonal_SeqBAIJ
3042 };
3043 
3044 EXTERN_C_BEGIN
3045 #undef __FUNCT__
3046 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3047 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3048 {
3049   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3050   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3051   PetscErrorCode ierr;
3052 
3053   PetscFunctionBegin;
3054   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3055 
3056   /* allocate space for values if not already there */
3057   if (!aij->saved_values) {
3058     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3059     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3060   }
3061 
3062   /* copy values over */
3063   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3064   PetscFunctionReturn(0);
3065 }
3066 EXTERN_C_END
3067 
3068 EXTERN_C_BEGIN
3069 #undef __FUNCT__
3070 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3071 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3072 {
3073   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3074   PetscErrorCode ierr;
3075   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3076 
3077   PetscFunctionBegin;
3078   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3079   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3080 
3081   /* copy values over */
3082   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3083   PetscFunctionReturn(0);
3084 }
3085 EXTERN_C_END
3086 
3087 EXTERN_C_BEGIN
3088 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3089 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3090 EXTERN_C_END
3091 
3092 EXTERN_C_BEGIN
3093 #undef __FUNCT__
3094 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3095 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3096 {
3097   Mat_SeqBAIJ    *b;
3098   PetscErrorCode ierr;
3099   PetscInt       i,mbs,nbs,bs2;
3100   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3101 
3102   PetscFunctionBegin;
3103   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3104   if (nz == MAT_SKIP_ALLOCATION) {
3105     skipallocation = PETSC_TRUE;
3106     nz             = 0;
3107   }
3108 
3109   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3110   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3111   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3112   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3113   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3114 
3115   B->preallocated = PETSC_TRUE;
3116 
3117   mbs  = B->rmap->n/bs;
3118   nbs  = B->cmap->n/bs;
3119   bs2  = bs*bs;
3120 
3121   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3122 
3123   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3124   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3125   if (nnz) {
3126     for (i=0; i<mbs; i++) {
3127       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3128       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3129     }
3130   }
3131 
3132   b       = (Mat_SeqBAIJ*)B->data;
3133   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3134     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3135   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3136 
3137   if (!flg) {
3138     switch (bs) {
3139     case 1:
3140       B->ops->mult            = MatMult_SeqBAIJ_1;
3141       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3142       B->ops->sor             = MatSOR_SeqBAIJ_1;
3143       break;
3144     case 2:
3145       B->ops->mult            = MatMult_SeqBAIJ_2;
3146       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3147       B->ops->sor             = MatSOR_SeqBAIJ_2;
3148       break;
3149     case 3:
3150       B->ops->mult            = MatMult_SeqBAIJ_3;
3151       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3152       B->ops->sor             = MatSOR_SeqBAIJ_3;
3153       break;
3154     case 4:
3155       B->ops->mult            = MatMult_SeqBAIJ_4;
3156       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3157       B->ops->sor             = MatSOR_SeqBAIJ_4;
3158       break;
3159     case 5:
3160       B->ops->mult            = MatMult_SeqBAIJ_5;
3161       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3162       B->ops->sor             = MatSOR_SeqBAIJ_5;
3163       break;
3164     case 6:
3165       B->ops->mult            = MatMult_SeqBAIJ_6;
3166       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3167       B->ops->sor             = MatSOR_SeqBAIJ_6;
3168       break;
3169     case 7:
3170       B->ops->mult            = MatMult_SeqBAIJ_7;
3171       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3172       B->ops->sor             = MatSOR_SeqBAIJ_7;
3173       break;
3174     case 15:
3175       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3176       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3177       B->ops->sor             = MatSOR_SeqBAIJ_N;
3178       break;
3179     default:
3180       B->ops->mult            = MatMult_SeqBAIJ_N;
3181       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3182       B->ops->sor             = MatSOR_SeqBAIJ_N;
3183       break;
3184     }
3185   }
3186   b->mbs       = mbs;
3187   b->nbs       = nbs;
3188   if (!skipallocation) {
3189     if (!b->imax) {
3190       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3191       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3192       b->free_imax_ilen = PETSC_TRUE;
3193     }
3194     /* b->ilen will count nonzeros in each block row so far. */
3195     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3196     if (!nnz) {
3197       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3198       else if (nz < 0) nz = 1;
3199       for (i=0; i<mbs; i++) b->imax[i] = nz;
3200       nz = nz*mbs;
3201     } else {
3202       nz = 0;
3203       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3204     }
3205 
3206     /* allocate the matrix space */
3207     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3208     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3209     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3210     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3211     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3212     b->singlemalloc = PETSC_TRUE;
3213     b->i[0] = 0;
3214     for (i=1; i<mbs+1; i++) {
3215       b->i[i] = b->i[i-1] + b->imax[i-1];
3216     }
3217     b->free_a     = PETSC_TRUE;
3218     b->free_ij    = PETSC_TRUE;
3219   } else {
3220     b->free_a     = PETSC_FALSE;
3221     b->free_ij    = PETSC_FALSE;
3222   }
3223 
3224   b->bs2              = bs2;
3225   b->mbs              = mbs;
3226   b->nz               = 0;
3227   b->maxnz            = nz;
3228   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3229   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3230   PetscFunctionReturn(0);
3231 }
3232 EXTERN_C_END
3233 
3234 EXTERN_C_BEGIN
3235 #undef __FUNCT__
3236 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3237 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3238 {
3239   PetscInt       i,m,nz,nz_max=0,*nnz;
3240   PetscScalar    *values=0;
3241   PetscErrorCode ierr;
3242 
3243   PetscFunctionBegin;
3244   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3245   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3246   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3247   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3248   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3249   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3250   m = B->rmap->n/bs;
3251 
3252   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3253   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3254   for (i=0; i<m; i++) {
3255     nz = ii[i+1]- ii[i];
3256     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3257     nz_max = PetscMax(nz_max, nz);
3258     nnz[i] = nz;
3259   }
3260   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3261   ierr = PetscFree(nnz);CHKERRQ(ierr);
3262 
3263   values = (PetscScalar*)V;
3264   if (!values) {
3265     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3266     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3267   }
3268   for (i=0; i<m; i++) {
3269     PetscInt          ncols  = ii[i+1] - ii[i];
3270     const PetscInt    *icols = jj + ii[i];
3271     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3272     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3273   }
3274   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3275   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3276   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3277   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3278   PetscFunctionReturn(0);
3279 }
3280 EXTERN_C_END
3281 
3282 
3283 EXTERN_C_BEGIN
3284 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3285 extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3286 #if defined(PETSC_HAVE_MUMPS)
3287 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3288 #endif
3289 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3290 EXTERN_C_END
3291 
3292 /*MC
3293    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3294    block sparse compressed row format.
3295 
3296    Options Database Keys:
3297 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3298 
3299   Level: beginner
3300 
3301 .seealso: MatCreateSeqBAIJ()
3302 M*/
3303 
3304 EXTERN_C_BEGIN
3305 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3306 EXTERN_C_END
3307 
3308 EXTERN_C_BEGIN
3309 #undef __FUNCT__
3310 #define __FUNCT__ "MatCreate_SeqBAIJ"
3311 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3312 {
3313   PetscErrorCode ierr;
3314   PetscMPIInt    size;
3315   Mat_SeqBAIJ    *b;
3316 
3317   PetscFunctionBegin;
3318   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3319   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3320 
3321   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3322   B->data = (void*)b;
3323   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3324   b->row                   = 0;
3325   b->col                   = 0;
3326   b->icol                  = 0;
3327   b->reallocs              = 0;
3328   b->saved_values          = 0;
3329 
3330   b->roworiented           = PETSC_TRUE;
3331   b->nonew                 = 0;
3332   b->diag                  = 0;
3333   b->solve_work            = 0;
3334   b->mult_work             = 0;
3335   B->spptr                 = 0;
3336   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3337   b->keepnonzeropattern    = PETSC_FALSE;
3338   b->xtoy                  = 0;
3339   b->XtoY                  = 0;
3340   B->same_nonzero          = PETSC_FALSE;
3341 
3342   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3343                                      "MatGetFactorAvailable_seqbaij_petsc",
3344                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3345   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3346                                      "MatGetFactor_seqbaij_petsc",
3347                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3348   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3349                                      "MatGetFactor_seqbaij_bstrm",
3350                                      MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3351 #if defined(PETSC_HAVE_MUMPS)
3352   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3353 #endif
3354   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3355                                      "MatInvertBlockDiagonal_SeqBAIJ",
3356                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3357   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3358                                      "MatStoreValues_SeqBAIJ",
3359                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3360   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3361                                      "MatRetrieveValues_SeqBAIJ",
3362                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3363   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3364                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3365                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3366   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3367                                      "MatConvert_SeqBAIJ_SeqAIJ",
3368                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3369   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3370                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3371                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3372   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3373                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3374                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3375   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3376                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3377                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3378   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3379                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3380                                       MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3381   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3382                                      "MatIsTranspose_SeqBAIJ",
3383                                       MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3384   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3385   PetscFunctionReturn(0);
3386 }
3387 EXTERN_C_END
3388 
3389 #undef __FUNCT__
3390 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3391 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3392 {
3393   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3394   PetscErrorCode ierr;
3395   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3396 
3397   PetscFunctionBegin;
3398   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3399 
3400   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3401     c->imax = a->imax;
3402     c->ilen = a->ilen;
3403     c->free_imax_ilen = PETSC_FALSE;
3404   } else {
3405     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3406     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3407     for (i=0; i<mbs; i++) {
3408       c->imax[i] = a->imax[i];
3409       c->ilen[i] = a->ilen[i];
3410     }
3411     c->free_imax_ilen = PETSC_TRUE;
3412   }
3413 
3414   /* allocate the matrix space */
3415   if (mallocmatspace){
3416     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3417       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3418       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3419       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3420       c->i            = a->i;
3421       c->j            = a->j;
3422       c->singlemalloc = PETSC_FALSE;
3423       c->free_a       = PETSC_TRUE;
3424       c->free_ij      = PETSC_FALSE;
3425       c->parent       = A;
3426       C->preallocated = PETSC_TRUE;
3427       C->assembled    = PETSC_TRUE;
3428       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3429       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3430       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3431     } else {
3432       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3433       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3434       c->singlemalloc = PETSC_TRUE;
3435       c->free_a       = PETSC_TRUE;
3436       c->free_ij      = PETSC_TRUE;
3437       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3438       if (mbs > 0) {
3439 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3440 	if (cpvalues == MAT_COPY_VALUES) {
3441 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3442 	} else {
3443 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3444 	}
3445       }
3446       C->preallocated = PETSC_TRUE;
3447       C->assembled    = PETSC_TRUE;
3448     }
3449   }
3450 
3451   c->roworiented = a->roworiented;
3452   c->nonew       = a->nonew;
3453   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3454   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3455   c->bs2         = a->bs2;
3456   c->mbs         = a->mbs;
3457   c->nbs         = a->nbs;
3458 
3459   if (a->diag) {
3460     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3461       c->diag      = a->diag;
3462       c->free_diag = PETSC_FALSE;
3463     } else {
3464       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3465       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3466       for (i=0; i<mbs; i++) {
3467         c->diag[i] = a->diag[i];
3468       }
3469       c->free_diag = PETSC_TRUE;
3470     }
3471   } else c->diag        = 0;
3472   c->nz                 = a->nz;
3473   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3474   c->solve_work         = 0;
3475   c->mult_work          = 0;
3476 
3477   c->compressedrow.use     = a->compressedrow.use;
3478   c->compressedrow.nrows   = a->compressedrow.nrows;
3479   c->compressedrow.check   = a->compressedrow.check;
3480   if (a->compressedrow.use){
3481     i = a->compressedrow.nrows;
3482     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3483     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3484     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3485     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3486   } else {
3487     c->compressedrow.use    = PETSC_FALSE;
3488     c->compressedrow.i      = PETSC_NULL;
3489     c->compressedrow.rindex = PETSC_NULL;
3490   }
3491   C->same_nonzero = A->same_nonzero;
3492   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3493   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3494   PetscFunctionReturn(0);
3495 }
3496 
3497 #undef __FUNCT__
3498 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3499 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3500 {
3501     PetscErrorCode ierr;
3502 
3503   PetscFunctionBegin;
3504   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3505   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3506   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3507   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3508   PetscFunctionReturn(0);
3509 }
3510 
3511 #undef __FUNCT__
3512 #define __FUNCT__ "MatLoad_SeqBAIJ"
3513 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3514 {
3515   Mat_SeqBAIJ    *a;
3516   PetscErrorCode ierr;
3517   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3518   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3519   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3520   PetscInt       *masked,nmask,tmp,bs2,ishift;
3521   PetscMPIInt    size;
3522   int            fd;
3523   PetscScalar    *aa;
3524   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3525 
3526   PetscFunctionBegin;
3527   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3528   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3529   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3530   bs2  = bs*bs;
3531 
3532   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3533   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3534   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3535   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3536   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3537   M = header[1]; N = header[2]; nz = header[3];
3538 
3539   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3540   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3541 
3542   /*
3543      This code adds extra rows to make sure the number of rows is
3544     divisible by the blocksize
3545   */
3546   mbs        = M/bs;
3547   extra_rows = bs - M + bs*(mbs);
3548   if (extra_rows == bs) extra_rows = 0;
3549   else                  mbs++;
3550   if (extra_rows) {
3551     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3552   }
3553 
3554   /* Set global sizes if not already set */
3555   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3556     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3557   } else { /* Check if the matrix global sizes are correct */
3558     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3559     if (rows < 0 && cols < 0){ /* user might provide local size instead of global size */
3560       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3561     }
3562     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3563   }
3564 
3565   /* read in row lengths */
3566   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3567   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3568   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3569 
3570   /* read in column indices */
3571   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3572   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3573   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3574 
3575   /* loop over row lengths determining block row lengths */
3576   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3577   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3578   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3579   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3580   rowcount = 0;
3581   nzcount = 0;
3582   for (i=0; i<mbs; i++) {
3583     nmask = 0;
3584     for (j=0; j<bs; j++) {
3585       kmax = rowlengths[rowcount];
3586       for (k=0; k<kmax; k++) {
3587         tmp = jj[nzcount++]/bs;
3588         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3589       }
3590       rowcount++;
3591     }
3592     browlengths[i] += nmask;
3593     /* zero out the mask elements we set */
3594     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3595   }
3596 
3597   /* Do preallocation  */
3598   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3599   a = (Mat_SeqBAIJ*)newmat->data;
3600 
3601   /* set matrix "i" values */
3602   a->i[0] = 0;
3603   for (i=1; i<= mbs; i++) {
3604     a->i[i]      = a->i[i-1] + browlengths[i-1];
3605     a->ilen[i-1] = browlengths[i-1];
3606   }
3607   a->nz         = 0;
3608   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3609 
3610   /* read in nonzero values */
3611   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3612   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3613   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3614 
3615   /* set "a" and "j" values into matrix */
3616   nzcount = 0; jcount = 0;
3617   for (i=0; i<mbs; i++) {
3618     nzcountb = nzcount;
3619     nmask    = 0;
3620     for (j=0; j<bs; j++) {
3621       kmax = rowlengths[i*bs+j];
3622       for (k=0; k<kmax; k++) {
3623         tmp = jj[nzcount++]/bs;
3624 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3625       }
3626     }
3627     /* sort the masked values */
3628     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3629 
3630     /* set "j" values into matrix */
3631     maskcount = 1;
3632     for (j=0; j<nmask; j++) {
3633       a->j[jcount++]  = masked[j];
3634       mask[masked[j]] = maskcount++;
3635     }
3636     /* set "a" values into matrix */
3637     ishift = bs2*a->i[i];
3638     for (j=0; j<bs; j++) {
3639       kmax = rowlengths[i*bs+j];
3640       for (k=0; k<kmax; k++) {
3641         tmp       = jj[nzcountb]/bs ;
3642         block     = mask[tmp] - 1;
3643         point     = jj[nzcountb] - bs*tmp;
3644         idx       = ishift + bs2*block + j + bs*point;
3645         a->a[idx] = (MatScalar)aa[nzcountb++];
3646       }
3647     }
3648     /* zero out the mask elements we set */
3649     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3650   }
3651   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3652 
3653   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3654   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3655   ierr = PetscFree(aa);CHKERRQ(ierr);
3656   ierr = PetscFree(jj);CHKERRQ(ierr);
3657   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3658 
3659   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3660   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3661   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3662   PetscFunctionReturn(0);
3663 }
3664 
3665 #undef __FUNCT__
3666 #define __FUNCT__ "MatCreateSeqBAIJ"
3667 /*@C
3668    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3669    compressed row) format.  For good matrix assembly performance the
3670    user should preallocate the matrix storage by setting the parameter nz
3671    (or the array nnz).  By setting these parameters accurately, performance
3672    during matrix assembly can be increased by more than a factor of 50.
3673 
3674    Collective on MPI_Comm
3675 
3676    Input Parameters:
3677 +  comm - MPI communicator, set to PETSC_COMM_SELF
3678 .  bs - size of block
3679 .  m - number of rows
3680 .  n - number of columns
3681 .  nz - number of nonzero blocks  per block row (same for all rows)
3682 -  nnz - array containing the number of nonzero blocks in the various block rows
3683          (possibly different for each block row) or PETSC_NULL
3684 
3685    Output Parameter:
3686 .  A - the matrix
3687 
3688    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3689    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3690    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3691 
3692    Options Database Keys:
3693 .   -mat_no_unroll - uses code that does not unroll the loops in the
3694                      block calculations (much slower)
3695 .    -mat_block_size - size of the blocks to use
3696 
3697    Level: intermediate
3698 
3699    Notes:
3700    The number of rows and columns must be divisible by blocksize.
3701 
3702    If the nnz parameter is given then the nz parameter is ignored
3703 
3704    A nonzero block is any block that as 1 or more nonzeros in it
3705 
3706    The block AIJ format is fully compatible with standard Fortran 77
3707    storage.  That is, the stored row and column indices can begin at
3708    either one (as in Fortran) or zero.  See the users' manual for details.
3709 
3710    Specify the preallocated storage with either nz or nnz (not both).
3711    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3712    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3713    matrices.
3714 
3715 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3716 @*/
3717 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3718 {
3719   PetscErrorCode ierr;
3720 
3721   PetscFunctionBegin;
3722   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3723   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3724   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3725   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3726   PetscFunctionReturn(0);
3727 }
3728 
3729 #undef __FUNCT__
3730 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3731 /*@C
3732    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3733    per row in the matrix. For good matrix assembly performance the
3734    user should preallocate the matrix storage by setting the parameter nz
3735    (or the array nnz).  By setting these parameters accurately, performance
3736    during matrix assembly can be increased by more than a factor of 50.
3737 
3738    Collective on MPI_Comm
3739 
3740    Input Parameters:
3741 +  A - the matrix
3742 .  bs - size of block
3743 .  nz - number of block nonzeros per block row (same for all rows)
3744 -  nnz - array containing the number of block nonzeros in the various block rows
3745          (possibly different for each block row) or PETSC_NULL
3746 
3747    Options Database Keys:
3748 .   -mat_no_unroll - uses code that does not unroll the loops in the
3749                      block calculations (much slower)
3750 .    -mat_block_size - size of the blocks to use
3751 
3752    Level: intermediate
3753 
3754    Notes:
3755    If the nnz parameter is given then the nz parameter is ignored
3756 
3757    You can call MatGetInfo() to get information on how effective the preallocation was;
3758    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3759    You can also run with the option -info and look for messages with the string
3760    malloc in them to see if additional memory allocation was needed.
3761 
3762    The block AIJ format is fully compatible with standard Fortran 77
3763    storage.  That is, the stored row and column indices can begin at
3764    either one (as in Fortran) or zero.  See the users' manual for details.
3765 
3766    Specify the preallocated storage with either nz or nnz (not both).
3767    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3768    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3769 
3770 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3771 @*/
3772 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3773 {
3774   PetscErrorCode ierr;
3775 
3776   PetscFunctionBegin;
3777   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3778   PetscValidType(B,1);
3779   PetscValidLogicalCollectiveInt(B,bs,2);
3780   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3781   PetscFunctionReturn(0);
3782 }
3783 
3784 #undef __FUNCT__
3785 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3786 /*@C
3787    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3788    (the default sequential PETSc format).
3789 
3790    Collective on MPI_Comm
3791 
3792    Input Parameters:
3793 +  A - the matrix
3794 .  i - the indices into j for the start of each local row (starts with zero)
3795 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3796 -  v - optional values in the matrix
3797 
3798    Level: developer
3799 
3800 .keywords: matrix, aij, compressed row, sparse
3801 
3802 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3803 @*/
3804 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3805 {
3806   PetscErrorCode ierr;
3807 
3808   PetscFunctionBegin;
3809   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3810   PetscValidType(B,1);
3811   PetscValidLogicalCollectiveInt(B,bs,2);
3812   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3813   PetscFunctionReturn(0);
3814 }
3815 
3816 
3817 #undef __FUNCT__
3818 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3819 /*@
3820      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3821 
3822      Collective on MPI_Comm
3823 
3824    Input Parameters:
3825 +  comm - must be an MPI communicator of size 1
3826 .  bs - size of block
3827 .  m - number of rows
3828 .  n - number of columns
3829 .  i - row indices
3830 .  j - column indices
3831 -  a - matrix values
3832 
3833    Output Parameter:
3834 .  mat - the matrix
3835 
3836    Level: advanced
3837 
3838    Notes:
3839        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3840     once the matrix is destroyed
3841 
3842        You cannot set new nonzero locations into this matrix, that will generate an error.
3843 
3844        The i and j indices are 0 based
3845 
3846        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3847 
3848 
3849 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3850 
3851 @*/
3852 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3853 {
3854   PetscErrorCode ierr;
3855   PetscInt       ii;
3856   Mat_SeqBAIJ    *baij;
3857 
3858   PetscFunctionBegin;
3859   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3860   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3861 
3862   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3863   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3864   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3865   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3866   baij = (Mat_SeqBAIJ*)(*mat)->data;
3867   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3868   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3869 
3870   baij->i = i;
3871   baij->j = j;
3872   baij->a = a;
3873   baij->singlemalloc = PETSC_FALSE;
3874   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3875   baij->free_a       = PETSC_FALSE;
3876   baij->free_ij      = PETSC_FALSE;
3877 
3878   for (ii=0; ii<m; ii++) {
3879     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3880 #if defined(PETSC_USE_DEBUG)
3881     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3882 #endif
3883   }
3884 #if defined(PETSC_USE_DEBUG)
3885   for (ii=0; ii<baij->i[m]; ii++) {
3886     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3887     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3888   }
3889 #endif
3890 
3891   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3892   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3893   PetscFunctionReturn(0);
3894 }
3895