xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 7d0a6c19129e7069c8a40e210b34ed62989173db)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include "../src/mat/impls/baij/seq/baij.h"  /*I   "petscmat.h"  I*/
7 #include "petscblaslapack.h"
8 #include "../src/mat/blockinvert.h"
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
12 /*@
13   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
14 
15   Collective on Mat
16 
17   Input Parameters:
18 . mat - the matrix
19 
20   Level: advanced
21 @*/
22 PetscErrorCode  MatSeqBAIJInvertBlockDiagonal(Mat mat)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
28   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
29   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
30   ierr = PetscUseMethod(mat,"MatSeqBAIJInvertBlockDiagonal_C",(Mat),(mat));CHKERRQ(ierr);
31   PetscFunctionReturn(0);
32 }
33 
34 EXTERN_C_BEGIN
35 #undef __FUNCT__
36 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
37 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A)
38 {
39   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
40   PetscErrorCode ierr;
41   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
42   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
43   PetscReal      shift = 0.0;
44 
45   PetscFunctionBegin;
46   if (a->idiagvalid) PetscFunctionReturn(0);
47   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
48   diag_offset = a->diag;
49   if (!a->idiag) {
50     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
51     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
52   }
53   diag  = a->idiag;
54   mdiag = a->idiag+bs2*mbs;
55   /* factor and invert each block */
56   switch (bs){
57     case 1:
58       for (i=0; i<mbs; i++) {
59         odiag = v + 1*diag_offset[i];
60         diag[0]  = odiag[0];
61         mdiag[0] = odiag[0];
62         diag[0]  = 1.0 / (diag[0] + shift);
63         diag    += 1;
64         mdiag   += 1;
65       }
66       break;
67     case 2:
68       for (i=0; i<mbs; i++) {
69         odiag   = v + 4*diag_offset[i];
70         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
71 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
72 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
73 	diag    += 4;
74 	mdiag   += 4;
75       }
76       break;
77     case 3:
78       for (i=0; i<mbs; i++) {
79         odiag    = v + 9*diag_offset[i];
80         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
81         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
82         diag[8]  = odiag[8];
83         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
84         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
85         mdiag[8] = odiag[8];
86 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
87 	diag    += 9;
88 	mdiag   += 9;
89       }
90       break;
91     case 4:
92       for (i=0; i<mbs; i++) {
93         odiag  = v + 16*diag_offset[i];
94         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
95         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
96 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
97 	diag  += 16;
98 	mdiag += 16;
99       }
100       break;
101     case 5:
102       for (i=0; i<mbs; i++) {
103         odiag = v + 25*diag_offset[i];
104         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
105         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
106 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
107 	diag  += 25;
108 	mdiag += 25;
109       }
110       break;
111     case 6:
112       for (i=0; i<mbs; i++) {
113         odiag = v + 36*diag_offset[i];
114         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
115         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
116 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
117 	diag  += 36;
118 	mdiag += 36;
119       }
120       break;
121     case 7:
122       for (i=0; i<mbs; i++) {
123         odiag = v + 49*diag_offset[i];
124         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
125         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
126 	ierr   = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
127 	diag  += 49;
128 	mdiag += 49;
129       }
130       break;
131     default:
132       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
133       for (i=0; i<mbs; i++) {
134         odiag = v + bs2*diag_offset[i];
135         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
136         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
137         ierr   = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
138 	diag  += bs2;
139 	mdiag += bs2;
140       }
141       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
142   }
143   a->idiagvalid = PETSC_TRUE;
144   PetscFunctionReturn(0);
145 }
146 EXTERN_C_END
147 
148 #undef __FUNCT__
149 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
150 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
151 {
152   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
153   PetscScalar        *x,x1,s1;
154   const PetscScalar  *b;
155   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
156   PetscErrorCode     ierr;
157   PetscInt           m = a->mbs,i,i2,nz,j;
158   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
159 
160   PetscFunctionBegin;
161   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
162   its = its*lits;
163   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
164   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
165   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
166   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
167   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
168 
169   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
170 
171   diag  = a->diag;
172   idiag = a->idiag;
173   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
174   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
175 
176   if (flag & SOR_ZERO_INITIAL_GUESS) {
177     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
178       x[0] = b[0]*idiag[0];
179       i2     = 1;
180       idiag += 1;
181       for (i=1; i<m; i++) {
182         v     = aa + ai[i];
183         vi    = aj + ai[i];
184         nz    = diag[i] - ai[i];
185         s1    = b[i2];
186         for (j=0; j<nz; j++) {
187           s1 -= v[j]*x[vi[j]];
188         }
189         x[i2]   = idiag[0]*s1;
190         idiag   += 1;
191         i2      += 1;
192       }
193       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
194       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
195     }
196     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
197         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
198       i2    = 0;
199       mdiag = a->idiag+a->mbs;
200       for (i=0; i<m; i++) {
201         x1      = x[i2];
202         x[i2]   = mdiag[0]*x1;
203         mdiag  += 1;
204         i2     += 1;
205       }
206       ierr = PetscLogFlops(m);CHKERRQ(ierr);
207     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
208       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
209     }
210     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
211       idiag   = a->idiag+a->mbs - 1;
212       i2      = m - 1;
213       x1      = x[i2];
214       x[i2]   = idiag[0]*x1;
215       idiag -= 1;
216       i2    -= 1;
217       for (i=m-2; i>=0; i--) {
218         v     = aa + (diag[i]+1);
219         vi    = aj + diag[i] + 1;
220         nz    = ai[i+1] - diag[i] - 1;
221         s1    = x[i2];
222         for (j=0; j<nz; j++) {
223           s1 -= v[j]*x[vi[j]];
224         }
225         x[i2]   = idiag[0]*s1;
226         idiag   -= 1;
227         i2      -= 1;
228       }
229       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
230     }
231   } else {
232     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
233   }
234   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
235   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
241 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
242 {
243   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
244   PetscScalar        *x,x1,x2,s1,s2;
245   const PetscScalar  *b;
246   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
247   PetscErrorCode     ierr;
248   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
249   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
250 
251   PetscFunctionBegin;
252   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
253   its = its*lits;
254   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
255   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
256   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
257   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
258   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
259 
260   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
261 
262   diag  = a->diag;
263   idiag = a->idiag;
264   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
265   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
266 
267   if (flag & SOR_ZERO_INITIAL_GUESS) {
268     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
269       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
270       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
271       i2     = 2;
272       idiag += 4;
273       for (i=1; i<m; i++) {
274 	v     = aa + 4*ai[i];
275 	vi    = aj + ai[i];
276 	nz    = diag[i] - ai[i];
277 	s1    = b[i2]; s2 = b[i2+1];
278         for (j=0; j<nz; j++) {
279 	  idx  = 2*vi[j];
280           it   = 4*j;
281 	  x1   = x[idx]; x2 = x[1+idx];
282 	  s1  -= v[it]*x1 + v[it+2]*x2;
283 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
284 	}
285 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
286 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
287         idiag   += 4;
288         i2      += 2;
289       }
290       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
291       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
292     }
293     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
294         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
295       i2    = 0;
296       mdiag = a->idiag+4*a->mbs;
297       for (i=0; i<m; i++) {
298         x1      = x[i2]; x2 = x[i2+1];
299         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
300         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
301         mdiag  += 4;
302         i2     += 2;
303       }
304       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
305     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
306       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
307     }
308     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
309       idiag   = a->idiag+4*a->mbs - 4;
310       i2      = 2*m - 2;
311       x1      = x[i2]; x2 = x[i2+1];
312       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
313       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
314       idiag -= 4;
315       i2    -= 2;
316       for (i=m-2; i>=0; i--) {
317 	v     = aa + 4*(diag[i]+1);
318 	vi    = aj + diag[i] + 1;
319 	nz    = ai[i+1] - diag[i] - 1;
320 	s1    = x[i2]; s2 = x[i2+1];
321         for (j=0; j<nz; j++) {
322  	  idx  = 2*vi[j];
323           it   = 4*j;
324 	  x1   = x[idx]; x2 = x[1+idx];
325 	  s1  -= v[it]*x1 + v[it+2]*x2;
326 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
327 	}
328 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
329 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
330         idiag   -= 4;
331         i2      -= 2;
332       }
333       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
334     }
335   } else {
336     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
337   }
338   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
339   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
340   PetscFunctionReturn(0);
341 }
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
345 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
346 {
347   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
348   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
349   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
350   const PetscScalar  *b;
351   PetscErrorCode     ierr;
352   PetscInt           m = a->mbs,i,i2,nz,idx;
353   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
354 
355   PetscFunctionBegin;
356   its = its*lits;
357   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
358   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
359   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
360   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
361   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
362   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
363 
364   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
365 
366   diag  = a->diag;
367   idiag = a->idiag;
368   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
369   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
370 
371   if (flag & SOR_ZERO_INITIAL_GUESS) {
372     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
373       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
374       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
375       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
376       i2     = 3;
377       idiag += 9;
378       for (i=1; i<m; i++) {
379         v     = aa + 9*ai[i];
380         vi    = aj + ai[i];
381         nz    = diag[i] - ai[i];
382         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
383         while (nz--) {
384           idx  = 3*(*vi++);
385           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
386           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
387           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
388           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
389           v   += 9;
390         }
391         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
392         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
393         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
394         idiag   += 9;
395         i2      += 3;
396       }
397       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
398       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
399     }
400     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
401         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
402       i2    = 0;
403       mdiag = a->idiag+9*a->mbs;
404       for (i=0; i<m; i++) {
405         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
406         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
407         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
408         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
409         mdiag  += 9;
410         i2     += 3;
411       }
412       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
413     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
414       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
415     }
416     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
417       idiag   = a->idiag+9*a->mbs - 9;
418       i2      = 3*m - 3;
419       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
420       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
421       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
422       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
423       idiag -= 9;
424       i2    -= 3;
425       for (i=m-2; i>=0; i--) {
426         v     = aa + 9*(diag[i]+1);
427         vi    = aj + diag[i] + 1;
428         nz    = ai[i+1] - diag[i] - 1;
429         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
430         while (nz--) {
431           idx  = 3*(*vi++);
432           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
433           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
434           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
435           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
436           v   += 9;
437         }
438         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
439         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
440         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
441         idiag   -= 9;
442         i2      -= 3;
443       }
444       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
445     }
446   } else {
447     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
448   }
449   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
450   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
456 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
457 {
458   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
459   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
460   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
461   const PetscScalar  *b;
462   PetscErrorCode     ierr;
463   PetscInt           m = a->mbs,i,i2,nz,idx;
464   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
465 
466   PetscFunctionBegin;
467   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
468   its = its*lits;
469   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
470   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
471   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
472   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
473   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
474 
475   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
476 
477   diag  = a->diag;
478   idiag = a->idiag;
479   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
480   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
481 
482   if (flag & SOR_ZERO_INITIAL_GUESS) {
483     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
484       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
485       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
486       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
487       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
488       i2     = 4;
489       idiag += 16;
490       for (i=1; i<m; i++) {
491 	v     = aa + 16*ai[i];
492 	vi    = aj + ai[i];
493 	nz    = diag[i] - ai[i];
494 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
495 	while (nz--) {
496 	  idx  = 4*(*vi++);
497 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
498 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
499 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
500 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
501 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
502 	  v   += 16;
503 	}
504 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
505 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
506 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
507 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
508         idiag   += 16;
509         i2      += 4;
510       }
511       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
512       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
513     }
514     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
515         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
516       i2    = 0;
517       mdiag = a->idiag+16*a->mbs;
518       for (i=0; i<m; i++) {
519         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
520         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
521         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
522         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
523         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
524         mdiag  += 16;
525         i2     += 4;
526       }
527       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
528     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
529       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
530     }
531     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
532       idiag   = a->idiag+16*a->mbs - 16;
533       i2      = 4*m - 4;
534       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
535       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
536       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
537       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
538       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
539       idiag -= 16;
540       i2    -= 4;
541       for (i=m-2; i>=0; i--) {
542 	v     = aa + 16*(diag[i]+1);
543 	vi    = aj + diag[i] + 1;
544 	nz    = ai[i+1] - diag[i] - 1;
545 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
546 	while (nz--) {
547 	  idx  = 4*(*vi++);
548 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
549 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
550 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
551 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
552 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
553 	  v   += 16;
554 	}
555 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
556 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
557 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
558 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
559         idiag   -= 16;
560         i2      -= 4;
561       }
562       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
563     }
564   } else {
565     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
566   }
567   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
568   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
574 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
575 {
576   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
577   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
578   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
579   const PetscScalar  *b;
580   PetscErrorCode     ierr;
581   PetscInt           m = a->mbs,i,i2,nz,idx;
582   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
583 
584   PetscFunctionBegin;
585   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
586   its = its*lits;
587   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
588   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
589   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
590   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
591   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
592 
593   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
594 
595   diag  = a->diag;
596   idiag = a->idiag;
597   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
598   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
599 
600   if (flag & SOR_ZERO_INITIAL_GUESS) {
601     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
602       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
603       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
604       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
605       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
606       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
607       i2     = 5;
608       idiag += 25;
609       for (i=1; i<m; i++) {
610 	v     = aa + 25*ai[i];
611 	vi    = aj + ai[i];
612 	nz    = diag[i] - ai[i];
613 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
614 	while (nz--) {
615 	  idx  = 5*(*vi++);
616 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
617 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
618 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
619 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
620 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
621 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
622 	  v   += 25;
623 	}
624 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
625 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
626 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
627 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
628 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
629         idiag   += 25;
630         i2      += 5;
631       }
632       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
633       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
634     }
635     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
636         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
637       i2    = 0;
638       mdiag = a->idiag+25*a->mbs;
639       for (i=0; i<m; i++) {
640         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
641         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
642         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
643         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
644         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
645         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
646         mdiag  += 25;
647         i2     += 5;
648       }
649       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
650     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
651       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
652     }
653     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
654       idiag   = a->idiag+25*a->mbs - 25;
655       i2      = 5*m - 5;
656       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
657       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
658       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
659       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
660       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
661       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
662       idiag -= 25;
663       i2    -= 5;
664       for (i=m-2; i>=0; i--) {
665 	v     = aa + 25*(diag[i]+1);
666 	vi    = aj + diag[i] + 1;
667 	nz    = ai[i+1] - diag[i] - 1;
668 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
669 	while (nz--) {
670 	  idx  = 5*(*vi++);
671 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
672 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
673 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
674 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
675 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
676 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
677 	  v   += 25;
678 	}
679 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
680 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
681 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
682 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
683 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
684         idiag   -= 25;
685         i2      -= 5;
686       }
687       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
688     }
689   } else {
690     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
691   }
692   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
693   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
694   PetscFunctionReturn(0);
695 }
696 
697 #undef __FUNCT__
698 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
699 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
700 {
701   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
702   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
703   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
704   const PetscScalar  *b;
705   PetscErrorCode     ierr;
706   PetscInt           m = a->mbs,i,i2,nz,idx;
707   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
708 
709   PetscFunctionBegin;
710   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
711   its = its*lits;
712   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
713   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
714   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
715   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
716   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
717 
718   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
719 
720   diag  = a->diag;
721   idiag = a->idiag;
722   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
723   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
724 
725   if (flag & SOR_ZERO_INITIAL_GUESS) {
726     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
727       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
728       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
729       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
730       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
731       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
732       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
733       i2     = 6;
734       idiag += 36;
735       for (i=1; i<m; i++) {
736         v     = aa + 36*ai[i];
737         vi    = aj + ai[i];
738         nz    = diag[i] - ai[i];
739         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
740         while (nz--) {
741           idx  = 6*(*vi++);
742           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
743           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
744           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
745           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
746           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
747           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
748           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
749           v   += 36;
750         }
751         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
752         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
753         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
754         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
755         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
756         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
757         idiag   += 36;
758         i2      += 6;
759       }
760       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
761       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
762     }
763     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
764         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
765       i2    = 0;
766       mdiag = a->idiag+36*a->mbs;
767       for (i=0; i<m; i++) {
768         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
769         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
770         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
771         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
772         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
773         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
774         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
775         mdiag  += 36;
776         i2     += 6;
777       }
778       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
779     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
780       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
781     }
782     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
783       idiag   = a->idiag+36*a->mbs - 36;
784       i2      = 6*m - 6;
785       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
786       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
787       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
788       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
789       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
790       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
791       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
792       idiag -= 36;
793       i2    -= 6;
794       for (i=m-2; i>=0; i--) {
795         v     = aa + 36*(diag[i]+1);
796         vi    = aj + diag[i] + 1;
797         nz    = ai[i+1] - diag[i] - 1;
798         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
799         while (nz--) {
800           idx  = 6*(*vi++);
801           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
802           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
803           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
804           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
805           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
806           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
807           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
808           v   += 36;
809         }
810         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
811         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
812         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
813         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
814         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
815         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
816         idiag   -= 36;
817         i2      -= 6;
818       }
819       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
820     }
821   } else {
822     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
823   }
824   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
825   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
826   PetscFunctionReturn(0);
827 }
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
831 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
832 {
833   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
834   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
835   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
836   const PetscScalar  *b;
837   PetscErrorCode     ierr;
838   PetscInt           m = a->mbs,i,i2,nz,idx;
839   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
840 
841   PetscFunctionBegin;
842   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
843   its = its*lits;
844   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
845   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
846   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
847   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
848   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
849 
850   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
851 
852   diag  = a->diag;
853   idiag = a->idiag;
854   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
855   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
856 
857   if (flag & SOR_ZERO_INITIAL_GUESS) {
858     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
859       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
860       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
861       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
862       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
863       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
864       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
865       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
866       i2     = 7;
867       idiag += 49;
868       for (i=1; i<m; i++) {
869         v     = aa + 49*ai[i];
870         vi    = aj + ai[i];
871         nz    = diag[i] - ai[i];
872         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
873         while (nz--) {
874           idx  = 7*(*vi++);
875           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
876           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
877           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
878           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
879           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
880           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
881           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
882           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
883           v   += 49;
884         }
885         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
886         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
887         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
888         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
889         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
890         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
891         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
892         idiag   += 49;
893         i2      += 7;
894       }
895       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
896       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
897     }
898     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
899         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
900       i2    = 0;
901       mdiag = a->idiag+49*a->mbs;
902       for (i=0; i<m; i++) {
903         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
904         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
905         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
906         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
907         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
908         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
909         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
910         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
911         mdiag  += 49;
912         i2     += 7;
913       }
914       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
915     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
916       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
917     }
918     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
919       idiag   = a->idiag+49*a->mbs - 49;
920       i2      = 7*m - 7;
921       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
922       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
923       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
924       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
925       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
926       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
927       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
928       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
929       idiag -= 49;
930       i2    -= 7;
931       for (i=m-2; i>=0; i--) {
932         v     = aa + 49*(diag[i]+1);
933         vi    = aj + diag[i] + 1;
934         nz    = ai[i+1] - diag[i] - 1;
935         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
936         while (nz--) {
937           idx  = 7*(*vi++);
938           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
939           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
940           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
941           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
942           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
943           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
944           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
945           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
946           v   += 49;
947         }
948         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
949         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
950         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
951         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
952         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
953         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
954         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
955         idiag   -= 49;
956         i2      -= 7;
957       }
958       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
959     }
960   } else {
961     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
962   }
963   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
964   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 #undef __FUNCT__
969 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
970 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
971 {
972   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
973   PetscScalar        *x,*work,*w,*workt;
974   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
975   const PetscScalar  *b;
976   PetscErrorCode     ierr;
977   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
978   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
979 
980   PetscFunctionBegin;
981   its = its*lits;
982   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
983   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
984   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
985   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
986   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
987   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
988 
989   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
990 
991   diag  = a->diag;
992   idiag = a->idiag;
993   if (!a->mult_work) {
994     k    = PetscMax(A->rmap->n,A->cmap->n);
995     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
996   }
997   work = a->mult_work;
998   if (!a->sor_work) {
999     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
1000   }
1001   w = a->sor_work;
1002 
1003   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1004   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1005 
1006   if (flag & SOR_ZERO_INITIAL_GUESS) {
1007     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1008       Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
1009       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
1010       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
1011       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
1012       i2     = bs;
1013       idiag += bs2;
1014       for (i=1; i<m; i++) {
1015         v     = aa + bs2*ai[i];
1016         vi    = aj + ai[i];
1017         nz    = diag[i] - ai[i];
1018 
1019         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1020 	/* copy all rows of x that are needed into contiguous space */
1021         workt = work;
1022         for (j=0; j<nz; j++) {
1023           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1024           workt += bs;
1025         }
1026         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1027        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1028         while (nz--) {
1029           idx  = N*(*vi++);
1030           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1031           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1032           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1033           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1034           v   += N2;
1035 	  } */
1036 
1037         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1038         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1039         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1040         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1041 
1042         idiag   += bs2;
1043         i2      += bs;
1044       }
1045       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1046       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1047     }
1048     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1049         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1050       i2    = 0;
1051       mdiag = a->idiag+bs2*a->mbs;
1052       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1053       for (i=0; i<m; i++) {
1054         Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1055         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1056         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1057         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1058         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1059 
1060         mdiag  += bs2;
1061         i2     += bs;
1062       }
1063       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1064     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1065       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1066     }
1067     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1068       idiag   = a->idiag+bs2*a->mbs - bs2;
1069       i2      = bs*m - bs;
1070       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1071       Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1072       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1073       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1074       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1075       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1076       idiag -= bs2;
1077       i2    -= bs;
1078       for (i=m-2; i>=0; i--) {
1079         v     = aa + bs2*(diag[i]+1);
1080         vi    = aj + diag[i] + 1;
1081         nz    = ai[i+1] - diag[i] - 1;
1082 
1083         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1084 	/* copy all rows of x that are needed into contiguous space */
1085         workt = work;
1086         for (j=0; j<nz; j++) {
1087           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1088           workt += bs;
1089         }
1090         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1091         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1092         while (nz--) {
1093           idx  = N*(*vi++);
1094           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1095           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1096           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1097           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1098           v   += N2;
1099 	  } */
1100 
1101         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1102         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1103         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1104         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1105         idiag   -= bs2;
1106         i2      -= bs;
1107       }
1108       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1109     }
1110   } else {
1111     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1112   }
1113   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1114   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 /*
1119     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1120 */
1121 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1122 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1123 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1124 #define matsetvaluesblocked4_ matsetvaluesblocked4
1125 #endif
1126 
1127 EXTERN_C_BEGIN
1128 #undef __FUNCT__
1129 #define __FUNCT__ "matsetvaluesblocked4_"
1130 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1131 {
1132   Mat               A = *AA;
1133   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1134   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1135   PetscInt          *ai=a->i,*ailen=a->ilen;
1136   PetscInt          *aj=a->j,stepval,lastcol = -1;
1137   const PetscScalar *value = v;
1138   MatScalar         *ap,*aa = a->a,*bap;
1139 
1140   PetscFunctionBegin;
1141   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1142   stepval = (n-1)*4;
1143   for (k=0; k<m; k++) { /* loop over added rows */
1144     row  = im[k];
1145     rp   = aj + ai[row];
1146     ap   = aa + 16*ai[row];
1147     nrow = ailen[row];
1148     low  = 0;
1149     high = nrow;
1150     for (l=0; l<n; l++) { /* loop over added columns */
1151       col = in[l];
1152       if (col <= lastcol) low = 0; else high = nrow;
1153       lastcol = col;
1154       value = v + k*(stepval+4 + l)*4;
1155       while (high-low > 7) {
1156         t = (low+high)/2;
1157         if (rp[t] > col) high = t;
1158         else             low  = t;
1159       }
1160       for (i=low; i<high; i++) {
1161         if (rp[i] > col) break;
1162         if (rp[i] == col) {
1163           bap  = ap +  16*i;
1164           for (ii=0; ii<4; ii++,value+=stepval) {
1165             for (jj=ii; jj<16; jj+=4) {
1166               bap[jj] += *value++;
1167             }
1168           }
1169           goto noinsert2;
1170         }
1171       }
1172       N = nrow++ - 1;
1173       high++; /* added new column index thus must search to one higher than before */
1174       /* shift up all the later entries in this row */
1175       for (ii=N; ii>=i; ii--) {
1176         rp[ii+1] = rp[ii];
1177         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1178       }
1179       if (N >= i) {
1180         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1181       }
1182       rp[i] = col;
1183       bap   = ap +  16*i;
1184       for (ii=0; ii<4; ii++,value+=stepval) {
1185         for (jj=ii; jj<16; jj+=4) {
1186           bap[jj] = *value++;
1187         }
1188       }
1189       noinsert2:;
1190       low = i;
1191     }
1192     ailen[row] = nrow;
1193   }
1194   PetscFunctionReturnVoid();
1195 }
1196 EXTERN_C_END
1197 
1198 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1199 #define matsetvalues4_ MATSETVALUES4
1200 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1201 #define matsetvalues4_ matsetvalues4
1202 #endif
1203 
1204 EXTERN_C_BEGIN
1205 #undef __FUNCT__
1206 #define __FUNCT__ "MatSetValues4_"
1207 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1208 {
1209   Mat         A = *AA;
1210   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1211   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1212   PetscInt    *ai=a->i,*ailen=a->ilen;
1213   PetscInt    *aj=a->j,brow,bcol;
1214   PetscInt    ridx,cidx,lastcol = -1;
1215   MatScalar   *ap,value,*aa=a->a,*bap;
1216 
1217   PetscFunctionBegin;
1218   for (k=0; k<m; k++) { /* loop over added rows */
1219     row  = im[k]; brow = row/4;
1220     rp   = aj + ai[brow];
1221     ap   = aa + 16*ai[brow];
1222     nrow = ailen[brow];
1223     low  = 0;
1224     high = nrow;
1225     for (l=0; l<n; l++) { /* loop over added columns */
1226       col = in[l]; bcol = col/4;
1227       ridx = row % 4; cidx = col % 4;
1228       value = v[l + k*n];
1229       if (col <= lastcol) low = 0; else high = nrow;
1230       lastcol = col;
1231       while (high-low > 7) {
1232         t = (low+high)/2;
1233         if (rp[t] > bcol) high = t;
1234         else              low  = t;
1235       }
1236       for (i=low; i<high; i++) {
1237         if (rp[i] > bcol) break;
1238         if (rp[i] == bcol) {
1239           bap  = ap +  16*i + 4*cidx + ridx;
1240           *bap += value;
1241           goto noinsert1;
1242         }
1243       }
1244       N = nrow++ - 1;
1245       high++; /* added new column thus must search to one higher than before */
1246       /* shift up all the later entries in this row */
1247       for (ii=N; ii>=i; ii--) {
1248         rp[ii+1] = rp[ii];
1249         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1250       }
1251       if (N>=i) {
1252         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1253       }
1254       rp[i]                    = bcol;
1255       ap[16*i + 4*cidx + ridx] = value;
1256       noinsert1:;
1257       low = i;
1258     }
1259     ailen[brow] = nrow;
1260   }
1261   PetscFunctionReturnVoid();
1262 }
1263 EXTERN_C_END
1264 
1265 /*
1266      Checks for missing diagonals
1267 */
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1270 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1271 {
1272   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1273   PetscErrorCode ierr;
1274   PetscInt       *diag,*jj = a->j,i;
1275 
1276   PetscFunctionBegin;
1277   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1278   *missing = PETSC_FALSE;
1279   if (A->rmap->n > 0 && !jj) {
1280     *missing  = PETSC_TRUE;
1281     if (d) *d = 0;
1282     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1283   } else {
1284     diag     = a->diag;
1285     for (i=0; i<a->mbs; i++) {
1286       if (jj[diag[i]] != i) {
1287         *missing  = PETSC_TRUE;
1288         if (d) *d = i;
1289         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1290       }
1291     }
1292   }
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1298 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1299 {
1300   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1301   PetscErrorCode ierr;
1302   PetscInt       i,j,m = a->mbs;
1303 
1304   PetscFunctionBegin;
1305   if (!a->diag) {
1306     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1307     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1308     a->free_diag = PETSC_TRUE;
1309   }
1310   for (i=0; i<m; i++) {
1311     a->diag[i] = a->i[i+1];
1312     for (j=a->i[i]; j<a->i[i+1]; j++) {
1313       if (a->j[j] == i) {
1314         a->diag[i] = j;
1315         break;
1316       }
1317     }
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 
1323 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1327 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1328 {
1329   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1330   PetscErrorCode ierr;
1331   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1332   PetscInt       *tia, *tja;
1333 
1334   PetscFunctionBegin;
1335   *nn = n;
1336   if (!ia) PetscFunctionReturn(0);
1337   if (symmetric) {
1338     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1339     nz   = tia[n];
1340   } else {
1341     tia = a->i; tja = a->j;
1342   }
1343 
1344   if (!blockcompressed && bs > 1) {
1345     (*nn) *= bs;
1346     /* malloc & create the natural set of indices */
1347     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1348     if (n) {
1349       (*ia)[0] = 0;
1350       for (j=1; j<bs; j++) {
1351         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1352       }
1353     }
1354 
1355     for (i=1; i<n; i++) {
1356       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1357       for (j=1; j<bs; j++) {
1358         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1359       }
1360     }
1361     if (n) {
1362       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1363     }
1364 
1365     if (ja) {
1366       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1367       cnt = 0;
1368       for (i=0; i<n; i++) {
1369         for (j=0; j<bs; j++) {
1370           for (k=tia[i]; k<tia[i+1]; k++) {
1371             for (l=0; l<bs; l++) {
1372               (*ja)[cnt++] = bs*tja[k] + l;
1373 	    }
1374           }
1375         }
1376       }
1377     }
1378 
1379     n     *= bs;
1380     nz *= bs*bs;
1381     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1382       ierr = PetscFree(tia);CHKERRQ(ierr);
1383       ierr = PetscFree(tja);CHKERRQ(ierr);
1384     }
1385   } else if (oshift == 1) {
1386     if (symmetric) {
1387       PetscInt nz = tia[A->rmap->n/bs];
1388       /*  add 1 to i and j indices */
1389       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1390       *ia = tia;
1391       if (ja) {
1392 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1393         *ja = tja;
1394       }
1395     } else {
1396       PetscInt nz = a->i[A->rmap->n/bs];
1397       /* malloc space and  add 1 to i and j indices */
1398       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1399       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1400       if (ja) {
1401 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1402 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1403       }
1404     }
1405   } else {
1406     *ia = tia;
1407     if (ja) *ja = tja;
1408   }
1409 
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1415 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1416 {
1417   PetscErrorCode ierr;
1418 
1419   PetscFunctionBegin;
1420   if (!ia) PetscFunctionReturn(0);
1421   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1422     ierr = PetscFree(*ia);CHKERRQ(ierr);
1423     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1424   }
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1430 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1431 {
1432   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1433   PetscErrorCode ierr;
1434 
1435   PetscFunctionBegin;
1436 #if defined(PETSC_USE_LOG)
1437   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1438 #endif
1439   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1440   if (a->row) {
1441     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1442   }
1443   if (a->col) {
1444     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1445   }
1446   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1447   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1448   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1449   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1450   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1451   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1452   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1453   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1454   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1455   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1456 
1457   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1458   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
1459   ierr = PetscFree(a);CHKERRQ(ierr);
1460 
1461   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1462   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1463   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1464   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1465   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1466   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1467   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1468   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1469   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1475 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1476 {
1477   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1478   PetscErrorCode ierr;
1479 
1480   PetscFunctionBegin;
1481   switch (op) {
1482   case MAT_ROW_ORIENTED:
1483     a->roworiented    = flg;
1484     break;
1485   case MAT_KEEP_NONZERO_PATTERN:
1486     a->keepnonzeropattern = flg;
1487     break;
1488   case MAT_NEW_NONZERO_LOCATIONS:
1489     a->nonew          = (flg ? 0 : 1);
1490     break;
1491   case MAT_NEW_NONZERO_LOCATION_ERR:
1492     a->nonew          = (flg ? -1 : 0);
1493     break;
1494   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1495     a->nonew          = (flg ? -2 : 0);
1496     break;
1497   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1498     a->nounused       = (flg ? -1 : 0);
1499     break;
1500   case MAT_CHECK_COMPRESSED_ROW:
1501     a->compressedrow.check = flg;
1502     break;
1503   case MAT_NEW_DIAGONALS:
1504   case MAT_IGNORE_OFF_PROC_ENTRIES:
1505   case MAT_USE_HASH_TABLE:
1506     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1507     break;
1508   case MAT_SYMMETRIC:
1509   case MAT_STRUCTURALLY_SYMMETRIC:
1510   case MAT_HERMITIAN:
1511   case MAT_SYMMETRY_ETERNAL:
1512     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1513     break;
1514   default:
1515     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1516   }
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1522 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1523 {
1524   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1525   PetscErrorCode ierr;
1526   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1527   MatScalar      *aa,*aa_i;
1528   PetscScalar    *v_i;
1529 
1530   PetscFunctionBegin;
1531   bs  = A->rmap->bs;
1532   ai  = a->i;
1533   aj  = a->j;
1534   aa  = a->a;
1535   bs2 = a->bs2;
1536 
1537   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1538 
1539   bn  = row/bs;   /* Block number */
1540   bp  = row % bs; /* Block Position */
1541   M   = ai[bn+1] - ai[bn];
1542   *nz = bs*M;
1543 
1544   if (v) {
1545     *v = 0;
1546     if (*nz) {
1547       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1548       for (i=0; i<M; i++) { /* for each block in the block row */
1549         v_i  = *v + i*bs;
1550         aa_i = aa + bs2*(ai[bn] + i);
1551         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1552       }
1553     }
1554   }
1555 
1556   if (idx) {
1557     *idx = 0;
1558     if (*nz) {
1559       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1560       for (i=0; i<M; i++) { /* for each block in the block row */
1561         idx_i = *idx + i*bs;
1562         itmp  = bs*aj[ai[bn] + i];
1563         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1564       }
1565     }
1566   }
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1572 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1573 {
1574   PetscErrorCode ierr;
1575 
1576   PetscFunctionBegin;
1577   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1578   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1586 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1587 {
1588   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1589   Mat            C;
1590   PetscErrorCode ierr;
1591   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1592   PetscInt       *rows,*cols,bs2=a->bs2;
1593   MatScalar      *array;
1594 
1595   PetscFunctionBegin;
1596   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1597   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1598     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1599     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1600 
1601     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1602     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1603     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1604     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1605     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1606     ierr = PetscFree(col);CHKERRQ(ierr);
1607   } else {
1608     C = *B;
1609   }
1610 
1611   array = a->a;
1612   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1613   for (i=0; i<mbs; i++) {
1614     cols[0] = i*bs;
1615     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1616     len = ai[i+1] - ai[i];
1617     for (j=0; j<len; j++) {
1618       rows[0] = (*aj++)*bs;
1619       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1620       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1621       array += bs2;
1622     }
1623   }
1624   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1625 
1626   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1627   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1628 
1629   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1630     *B = C;
1631   } else {
1632     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1633   }
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 #undef __FUNCT__
1638 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1639 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1640 {
1641   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1642   PetscErrorCode ierr;
1643   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1644   int            fd;
1645   PetscScalar    *aa;
1646   FILE           *file;
1647 
1648   PetscFunctionBegin;
1649   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1650   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1651   col_lens[0] = MAT_FILE_CLASSID;
1652 
1653   col_lens[1] = A->rmap->N;
1654   col_lens[2] = A->cmap->n;
1655   col_lens[3] = a->nz*bs2;
1656 
1657   /* store lengths of each row and write (including header) to file */
1658   count = 0;
1659   for (i=0; i<a->mbs; i++) {
1660     for (j=0; j<bs; j++) {
1661       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1662     }
1663   }
1664   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1665   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1666 
1667   /* store column indices (zero start index) */
1668   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1669   count = 0;
1670   for (i=0; i<a->mbs; i++) {
1671     for (j=0; j<bs; j++) {
1672       for (k=a->i[i]; k<a->i[i+1]; k++) {
1673         for (l=0; l<bs; l++) {
1674           jj[count++] = bs*a->j[k] + l;
1675         }
1676       }
1677     }
1678   }
1679   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1680   ierr = PetscFree(jj);CHKERRQ(ierr);
1681 
1682   /* store nonzero values */
1683   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1684   count = 0;
1685   for (i=0; i<a->mbs; i++) {
1686     for (j=0; j<bs; j++) {
1687       for (k=a->i[i]; k<a->i[i+1]; k++) {
1688         for (l=0; l<bs; l++) {
1689           aa[count++] = a->a[bs2*k + l*bs + j];
1690         }
1691       }
1692     }
1693   }
1694   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1695   ierr = PetscFree(aa);CHKERRQ(ierr);
1696 
1697   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1698   if (file) {
1699     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1700   }
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 #undef __FUNCT__
1705 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1706 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1707 {
1708   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1709   PetscErrorCode    ierr;
1710   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1711   PetscViewerFormat format;
1712 
1713   PetscFunctionBegin;
1714   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1715   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1716     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1717   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1718     Mat aij;
1719     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1720     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1721     ierr = MatDestroy(aij);CHKERRQ(ierr);
1722   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1723      PetscFunctionReturn(0);
1724   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1725     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1726     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1727     for (i=0; i<a->mbs; i++) {
1728       for (j=0; j<bs; j++) {
1729         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1730         for (k=a->i[i]; k<a->i[i+1]; k++) {
1731           for (l=0; l<bs; l++) {
1732 #if defined(PETSC_USE_COMPLEX)
1733             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1734               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1735                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1736             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1737               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1738                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1739             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1740               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1741             }
1742 #else
1743             if (a->a[bs2*k + l*bs + j] != 0.0) {
1744               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1745             }
1746 #endif
1747           }
1748         }
1749         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1750       }
1751     }
1752     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1753   } else {
1754     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1755     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1756     for (i=0; i<a->mbs; i++) {
1757       for (j=0; j<bs; j++) {
1758         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1759         for (k=a->i[i]; k<a->i[i+1]; k++) {
1760           for (l=0; l<bs; l++) {
1761 #if defined(PETSC_USE_COMPLEX)
1762             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1763               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1764                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1765             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1766               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1767                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1768             } else {
1769               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1770             }
1771 #else
1772             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1773 #endif
1774           }
1775         }
1776         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1777       }
1778     }
1779     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1780   }
1781   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1782   PetscFunctionReturn(0);
1783 }
1784 
1785 #undef __FUNCT__
1786 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1787 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1788 {
1789   Mat            A = (Mat) Aa;
1790   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1791   PetscErrorCode ierr;
1792   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1793   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1794   MatScalar      *aa;
1795   PetscViewer    viewer;
1796 
1797   PetscFunctionBegin;
1798 
1799   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1800   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1801 
1802   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1803 
1804   /* loop over matrix elements drawing boxes */
1805   color = PETSC_DRAW_BLUE;
1806   for (i=0,row=0; i<mbs; i++,row+=bs) {
1807     for (j=a->i[i]; j<a->i[i+1]; j++) {
1808       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1809       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1810       aa = a->a + j*bs2;
1811       for (k=0; k<bs; k++) {
1812         for (l=0; l<bs; l++) {
1813           if (PetscRealPart(*aa++) >=  0.) continue;
1814           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1815         }
1816       }
1817     }
1818   }
1819   color = PETSC_DRAW_CYAN;
1820   for (i=0,row=0; i<mbs; i++,row+=bs) {
1821     for (j=a->i[i]; j<a->i[i+1]; j++) {
1822       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1823       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1824       aa = a->a + j*bs2;
1825       for (k=0; k<bs; k++) {
1826         for (l=0; l<bs; l++) {
1827           if (PetscRealPart(*aa++) != 0.) continue;
1828           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1829         }
1830       }
1831     }
1832   }
1833 
1834   color = PETSC_DRAW_RED;
1835   for (i=0,row=0; i<mbs; i++,row+=bs) {
1836     for (j=a->i[i]; j<a->i[i+1]; j++) {
1837       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1838       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1839       aa = a->a + j*bs2;
1840       for (k=0; k<bs; k++) {
1841         for (l=0; l<bs; l++) {
1842           if (PetscRealPart(*aa++) <= 0.) continue;
1843           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1844         }
1845       }
1846     }
1847   }
1848   PetscFunctionReturn(0);
1849 }
1850 
1851 #undef __FUNCT__
1852 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1853 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1854 {
1855   PetscErrorCode ierr;
1856   PetscReal      xl,yl,xr,yr,w,h;
1857   PetscDraw      draw;
1858   PetscBool      isnull;
1859 
1860   PetscFunctionBegin;
1861 
1862   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1863   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1864 
1865   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1866   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1867   xr += w;    yr += h;  xl = -w;     yl = -h;
1868   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1869   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1870   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatView_SeqBAIJ"
1876 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1877 {
1878   PetscErrorCode ierr;
1879   PetscBool      iascii,isbinary,isdraw;
1880 
1881   PetscFunctionBegin;
1882   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1883   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1884   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1885   if (iascii){
1886     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1887   } else if (isbinary) {
1888     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1889   } else if (isdraw) {
1890     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1891   } else {
1892     Mat B;
1893     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1894     ierr = MatView(B,viewer);CHKERRQ(ierr);
1895     ierr = MatDestroy(B);CHKERRQ(ierr);
1896   }
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 
1901 #undef __FUNCT__
1902 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1903 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1904 {
1905   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1906   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1907   PetscInt    *ai = a->i,*ailen = a->ilen;
1908   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1909   MatScalar   *ap,*aa = a->a;
1910 
1911   PetscFunctionBegin;
1912   for (k=0; k<m; k++) { /* loop over rows */
1913     row  = im[k]; brow = row/bs;
1914     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1915     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1916     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1917     nrow = ailen[brow];
1918     for (l=0; l<n; l++) { /* loop over columns */
1919       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1920       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1921       col  = in[l] ;
1922       bcol = col/bs;
1923       cidx = col%bs;
1924       ridx = row%bs;
1925       high = nrow;
1926       low  = 0; /* assume unsorted */
1927       while (high-low > 5) {
1928         t = (low+high)/2;
1929         if (rp[t] > bcol) high = t;
1930         else             low  = t;
1931       }
1932       for (i=low; i<high; i++) {
1933         if (rp[i] > bcol) break;
1934         if (rp[i] == bcol) {
1935           *v++ = ap[bs2*i+bs*cidx+ridx];
1936           goto finished;
1937         }
1938       }
1939       *v++ = 0.0;
1940       finished:;
1941     }
1942   }
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 #undef __FUNCT__
1947 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1948 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1949 {
1950   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1951   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1952   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1953   PetscErrorCode    ierr;
1954   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1955   PetscBool         roworiented=a->roworiented;
1956   const PetscScalar *value = v;
1957   MatScalar         *ap,*aa = a->a,*bap;
1958 
1959   PetscFunctionBegin;
1960   if (roworiented) {
1961     stepval = (n-1)*bs;
1962   } else {
1963     stepval = (m-1)*bs;
1964   }
1965   for (k=0; k<m; k++) { /* loop over added rows */
1966     row  = im[k];
1967     if (row < 0) continue;
1968 #if defined(PETSC_USE_DEBUG)
1969     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1970 #endif
1971     rp   = aj + ai[row];
1972     ap   = aa + bs2*ai[row];
1973     rmax = imax[row];
1974     nrow = ailen[row];
1975     low  = 0;
1976     high = nrow;
1977     for (l=0; l<n; l++) { /* loop over added columns */
1978       if (in[l] < 0) continue;
1979 #if defined(PETSC_USE_DEBUG)
1980       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1981 #endif
1982       col = in[l];
1983       if (roworiented) {
1984         value = v + (k*(stepval+bs) + l)*bs;
1985       } else {
1986         value = v + (l*(stepval+bs) + k)*bs;
1987       }
1988       if (col <= lastcol) low = 0; else high = nrow;
1989       lastcol = col;
1990       while (high-low > 7) {
1991         t = (low+high)/2;
1992         if (rp[t] > col) high = t;
1993         else             low  = t;
1994       }
1995       for (i=low; i<high; i++) {
1996         if (rp[i] > col) break;
1997         if (rp[i] == col) {
1998           bap  = ap +  bs2*i;
1999           if (roworiented) {
2000             if (is == ADD_VALUES) {
2001               for (ii=0; ii<bs; ii++,value+=stepval) {
2002                 for (jj=ii; jj<bs2; jj+=bs) {
2003                   bap[jj] += *value++;
2004                 }
2005               }
2006             } else {
2007               for (ii=0; ii<bs; ii++,value+=stepval) {
2008                 for (jj=ii; jj<bs2; jj+=bs) {
2009                   bap[jj] = *value++;
2010                 }
2011               }
2012             }
2013           } else {
2014             if (is == ADD_VALUES) {
2015               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2016                 for (jj=0; jj<bs; jj++) {
2017                   bap[jj] += value[jj];
2018                 }
2019                 bap += bs;
2020               }
2021             } else {
2022               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2023                 for (jj=0; jj<bs; jj++) {
2024                   bap[jj]  = value[jj];
2025                 }
2026                 bap += bs;
2027               }
2028             }
2029           }
2030           goto noinsert2;
2031         }
2032       }
2033       if (nonew == 1) goto noinsert2;
2034       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2035       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2036       N = nrow++ - 1; high++;
2037       /* shift up all the later entries in this row */
2038       for (ii=N; ii>=i; ii--) {
2039         rp[ii+1] = rp[ii];
2040         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2041       }
2042       if (N >= i) {
2043         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2044       }
2045       rp[i] = col;
2046       bap   = ap +  bs2*i;
2047       if (roworiented) {
2048         for (ii=0; ii<bs; ii++,value+=stepval) {
2049           for (jj=ii; jj<bs2; jj+=bs) {
2050             bap[jj] = *value++;
2051           }
2052         }
2053       } else {
2054         for (ii=0; ii<bs; ii++,value+=stepval) {
2055           for (jj=0; jj<bs; jj++) {
2056             *bap++  = *value++;
2057           }
2058         }
2059       }
2060       noinsert2:;
2061       low = i;
2062     }
2063     ailen[row] = nrow;
2064   }
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2070 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2071 {
2072   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2073   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2074   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2075   PetscErrorCode ierr;
2076   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2077   MatScalar      *aa = a->a,*ap;
2078   PetscReal      ratio=0.6;
2079 
2080   PetscFunctionBegin;
2081   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2082 
2083   if (m) rmax = ailen[0];
2084   for (i=1; i<mbs; i++) {
2085     /* move each row back by the amount of empty slots (fshift) before it*/
2086     fshift += imax[i-1] - ailen[i-1];
2087     rmax   = PetscMax(rmax,ailen[i]);
2088     if (fshift) {
2089       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2090       N = ailen[i];
2091       for (j=0; j<N; j++) {
2092         ip[j-fshift] = ip[j];
2093         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2094       }
2095     }
2096     ai[i] = ai[i-1] + ailen[i-1];
2097   }
2098   if (mbs) {
2099     fshift += imax[mbs-1] - ailen[mbs-1];
2100     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2101   }
2102   /* reset ilen and imax for each row */
2103   for (i=0; i<mbs; i++) {
2104     ailen[i] = imax[i] = ai[i+1] - ai[i];
2105   }
2106   a->nz = ai[mbs];
2107 
2108   /* diagonals may have moved, so kill the diagonal pointers */
2109   a->idiagvalid = PETSC_FALSE;
2110   if (fshift && a->diag) {
2111     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2112     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2113     a->diag = 0;
2114   }
2115   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2116   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2117   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2118   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2119   A->info.mallocs     += a->reallocs;
2120   a->reallocs          = 0;
2121   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2122 
2123   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2124   A->same_nonzero = PETSC_TRUE;
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 /*
2129    This function returns an array of flags which indicate the locations of contiguous
2130    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2131    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2132    Assume: sizes should be long enough to hold all the values.
2133 */
2134 #undef __FUNCT__
2135 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2136 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2137 {
2138   PetscInt   i,j,k,row;
2139   PetscBool  flg;
2140 
2141   PetscFunctionBegin;
2142   for (i=0,j=0; i<n; j++) {
2143     row = idx[i];
2144     if (row%bs!=0) { /* Not the begining of a block */
2145       sizes[j] = 1;
2146       i++;
2147     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2148       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2149       i++;
2150     } else { /* Begining of the block, so check if the complete block exists */
2151       flg = PETSC_TRUE;
2152       for (k=1; k<bs; k++) {
2153         if (row+k != idx[i+k]) { /* break in the block */
2154           flg = PETSC_FALSE;
2155           break;
2156         }
2157       }
2158       if (flg) { /* No break in the bs */
2159         sizes[j] = bs;
2160         i+= bs;
2161       } else {
2162         sizes[j] = 1;
2163         i++;
2164       }
2165     }
2166   }
2167   *bs_max = j;
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 #undef __FUNCT__
2172 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2173 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2174 {
2175   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2176   PetscErrorCode    ierr;
2177   PetscInt          i,j,k,count,*rows;
2178   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2179   PetscScalar       zero = 0.0;
2180   MatScalar         *aa;
2181   const PetscScalar *xx;
2182   PetscScalar       *bb;
2183 
2184   PetscFunctionBegin;
2185   /* fix right hand side if needed */
2186   if (x && b) {
2187     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2188     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2189     for (i=0; i<is_n; i++) {
2190       bb[is_idx[i]] = diag*xx[is_idx[i]];
2191     }
2192     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2193     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2194   }
2195 
2196   /* Make a copy of the IS and  sort it */
2197   /* allocate memory for rows,sizes */
2198   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2199 
2200   /* copy IS values to rows, and sort them */
2201   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2202   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2203 
2204   if (baij->keepnonzeropattern) {
2205     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2206     bs_max = is_n;
2207     A->same_nonzero = PETSC_TRUE;
2208   } else {
2209     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2210     A->same_nonzero = PETSC_FALSE;
2211   }
2212 
2213   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2214     row   = rows[j];
2215     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2216     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2217     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2218     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2219       if (diag != 0.0) {
2220         if (baij->ilen[row/bs] > 0) {
2221           baij->ilen[row/bs]       = 1;
2222           baij->j[baij->i[row/bs]] = row/bs;
2223           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2224         }
2225         /* Now insert all the diagonal values for this bs */
2226         for (k=0; k<bs; k++) {
2227           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2228         }
2229       } else { /* (diag == 0.0) */
2230         baij->ilen[row/bs] = 0;
2231       } /* end (diag == 0.0) */
2232     } else { /* (sizes[i] != bs) */
2233 #if defined (PETSC_USE_DEBUG)
2234       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2235 #endif
2236       for (k=0; k<count; k++) {
2237         aa[0] =  zero;
2238         aa    += bs;
2239       }
2240       if (diag != 0.0) {
2241         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2242       }
2243     }
2244   }
2245 
2246   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2247   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2248   PetscFunctionReturn(0);
2249 }
2250 
2251 #undef __FUNCT__
2252 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2253 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2254 {
2255   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2256   PetscErrorCode    ierr;
2257   PetscInt          i,j,k,count;
2258   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2259   PetscScalar       zero = 0.0;
2260   MatScalar         *aa;
2261   const PetscScalar *xx;
2262   PetscScalar       *bb;
2263   PetscBool         *zeroed,vecs = PETSC_FALSE;
2264 
2265   PetscFunctionBegin;
2266   /* fix right hand side if needed */
2267   if (x && b) {
2268     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2269     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2270     vecs = PETSC_TRUE;
2271   }
2272   A->same_nonzero = PETSC_TRUE;
2273 
2274   /* zero the columns */
2275   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2276   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2277   for (i=0; i<is_n; i++) {
2278     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2279     zeroed[is_idx[i]] = PETSC_TRUE;
2280   }
2281   for (i=0; i<A->rmap->N; i++) {
2282     if (!zeroed[i]) {
2283       row = i/bs;
2284       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2285         for (k=0; k<bs; k++) {
2286           col = bs*baij->j[j] + k;
2287 	  if (zeroed[col]) {
2288 	    aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2289             if (vecs) bb[i] -= aa[0]*xx[col];
2290             aa[0] = 0.0;
2291           }
2292         }
2293       }
2294     } else if (vecs) bb[i] = diag*xx[i];
2295   }
2296   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2297   if (vecs) {
2298     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2299     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2300   }
2301 
2302   /* zero the rows */
2303   for (i=0; i<is_n; i++) {
2304     row   = is_idx[i];
2305     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2306     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2307     for (k=0; k<count; k++) {
2308       aa[0] =  zero;
2309       aa    += bs;
2310     }
2311     if (diag != 0.0) {
2312       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2313     }
2314   }
2315   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2316   PetscFunctionReturn(0);
2317 }
2318 
2319 #undef __FUNCT__
2320 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2321 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2322 {
2323   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2324   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2325   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2326   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2327   PetscErrorCode ierr;
2328   PetscInt       ridx,cidx,bs2=a->bs2;
2329   PetscBool      roworiented=a->roworiented;
2330   MatScalar      *ap,value,*aa=a->a,*bap;
2331 
2332   PetscFunctionBegin;
2333   if (v) PetscValidScalarPointer(v,6);
2334   for (k=0; k<m; k++) { /* loop over added rows */
2335     row  = im[k];
2336     brow = row/bs;
2337     if (row < 0) continue;
2338 #if defined(PETSC_USE_DEBUG)
2339     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2340 #endif
2341     rp   = aj + ai[brow];
2342     ap   = aa + bs2*ai[brow];
2343     rmax = imax[brow];
2344     nrow = ailen[brow];
2345     low  = 0;
2346     high = nrow;
2347     for (l=0; l<n; l++) { /* loop over added columns */
2348       if (in[l] < 0) continue;
2349 #if defined(PETSC_USE_DEBUG)
2350       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2351 #endif
2352       col = in[l]; bcol = col/bs;
2353       ridx = row % bs; cidx = col % bs;
2354       if (roworiented) {
2355         value = v[l + k*n];
2356       } else {
2357         value = v[k + l*m];
2358       }
2359       if (col <= lastcol) low = 0; else high = nrow;
2360       lastcol = col;
2361       while (high-low > 7) {
2362         t = (low+high)/2;
2363         if (rp[t] > bcol) high = t;
2364         else              low  = t;
2365       }
2366       for (i=low; i<high; i++) {
2367         if (rp[i] > bcol) break;
2368         if (rp[i] == bcol) {
2369           bap  = ap +  bs2*i + bs*cidx + ridx;
2370           if (is == ADD_VALUES) *bap += value;
2371           else                  *bap  = value;
2372           goto noinsert1;
2373         }
2374       }
2375       if (nonew == 1) goto noinsert1;
2376       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2377       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2378       N = nrow++ - 1; high++;
2379       /* shift up all the later entries in this row */
2380       for (ii=N; ii>=i; ii--) {
2381         rp[ii+1] = rp[ii];
2382         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2383       }
2384       if (N>=i) {
2385         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2386       }
2387       rp[i]                      = bcol;
2388       ap[bs2*i + bs*cidx + ridx] = value;
2389       a->nz++;
2390       noinsert1:;
2391       low = i;
2392     }
2393     ailen[brow] = nrow;
2394   }
2395   A->same_nonzero = PETSC_FALSE;
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 #undef __FUNCT__
2400 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2401 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2402 {
2403   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2404   Mat            outA;
2405   PetscErrorCode ierr;
2406   PetscBool      row_identity,col_identity;
2407 
2408   PetscFunctionBegin;
2409   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2410   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2411   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2412   if (!row_identity || !col_identity) {
2413     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2414   }
2415 
2416   outA            = inA;
2417   inA->factortype = MAT_FACTOR_LU;
2418 
2419   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2420 
2421   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2422   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2423   a->row = row;
2424   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2425   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2426   a->col = col;
2427 
2428   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2429   if (a->icol) {
2430     ierr = ISDestroy(a->icol);CHKERRQ(ierr);
2431   }
2432   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2433   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2434 
2435   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2436   if (!a->solve_work) {
2437     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2438     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2439   }
2440   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2441 
2442   PetscFunctionReturn(0);
2443 }
2444 
2445 EXTERN_C_BEGIN
2446 #undef __FUNCT__
2447 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2448 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2449 {
2450   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2451   PetscInt    i,nz,mbs;
2452 
2453   PetscFunctionBegin;
2454   nz  = baij->maxnz;
2455   mbs = baij->mbs;
2456   for (i=0; i<nz; i++) {
2457     baij->j[i] = indices[i];
2458   }
2459   baij->nz = nz;
2460   for (i=0; i<mbs; i++) {
2461     baij->ilen[i] = baij->imax[i];
2462   }
2463   PetscFunctionReturn(0);
2464 }
2465 EXTERN_C_END
2466 
2467 #undef __FUNCT__
2468 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2469 /*@
2470     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2471        in the matrix.
2472 
2473   Input Parameters:
2474 +  mat - the SeqBAIJ matrix
2475 -  indices - the column indices
2476 
2477   Level: advanced
2478 
2479   Notes:
2480     This can be called if you have precomputed the nonzero structure of the
2481   matrix and want to provide it to the matrix object to improve the performance
2482   of the MatSetValues() operation.
2483 
2484     You MUST have set the correct numbers of nonzeros per row in the call to
2485   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2486 
2487     MUST be called before any calls to MatSetValues();
2488 
2489 @*/
2490 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2491 {
2492   PetscErrorCode ierr;
2493 
2494   PetscFunctionBegin;
2495   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2496   PetscValidPointer(indices,2);
2497   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2498   PetscFunctionReturn(0);
2499 }
2500 
2501 #undef __FUNCT__
2502 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2503 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2504 {
2505   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2506   PetscErrorCode ierr;
2507   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2508   PetscReal      atmp;
2509   PetscScalar    *x,zero = 0.0;
2510   MatScalar      *aa;
2511   PetscInt       ncols,brow,krow,kcol;
2512 
2513   PetscFunctionBegin;
2514   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2515   bs   = A->rmap->bs;
2516   aa   = a->a;
2517   ai   = a->i;
2518   aj   = a->j;
2519   mbs  = a->mbs;
2520 
2521   ierr = VecSet(v,zero);CHKERRQ(ierr);
2522   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2523   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2524   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2525   for (i=0; i<mbs; i++) {
2526     ncols = ai[1] - ai[0]; ai++;
2527     brow  = bs*i;
2528     for (j=0; j<ncols; j++){
2529       for (kcol=0; kcol<bs; kcol++){
2530         for (krow=0; krow<bs; krow++){
2531           atmp = PetscAbsScalar(*aa);aa++;
2532           row = brow + krow;    /* row index */
2533           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2534           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2535         }
2536       }
2537       aj++;
2538     }
2539   }
2540   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2541   PetscFunctionReturn(0);
2542 }
2543 
2544 #undef __FUNCT__
2545 #define __FUNCT__ "MatCopy_SeqBAIJ"
2546 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2547 {
2548   PetscErrorCode ierr;
2549 
2550   PetscFunctionBegin;
2551   /* If the two matrices have the same copy implementation, use fast copy. */
2552   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2553     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2554     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2555     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2556 
2557     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2558     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2559     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2560   } else {
2561     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2562   }
2563   PetscFunctionReturn(0);
2564 }
2565 
2566 #undef __FUNCT__
2567 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2568 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2569 {
2570   PetscErrorCode ierr;
2571 
2572   PetscFunctionBegin;
2573   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 #undef __FUNCT__
2578 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2579 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2580 {
2581   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2582   PetscFunctionBegin;
2583   *array = a->a;
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 #undef __FUNCT__
2588 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2589 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2590 {
2591   PetscFunctionBegin;
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 #undef __FUNCT__
2596 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2597 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2598 {
2599   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2600   PetscErrorCode ierr;
2601   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2602   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2603 
2604   PetscFunctionBegin;
2605   if (str == SAME_NONZERO_PATTERN) {
2606     PetscScalar alpha = a;
2607     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2608   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2609     if (y->xtoy && y->XtoY != X) {
2610       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2611       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2612     }
2613     if (!y->xtoy) { /* get xtoy */
2614       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2615       y->XtoY = X;
2616       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2617     }
2618     bs2 = bs*bs;
2619     for (i=0; i<x->nz; i++) {
2620       j = 0;
2621       while (j < bs2){
2622         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2623         j++;
2624       }
2625     }
2626     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2627   } else {
2628     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2629   }
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 #undef __FUNCT__
2634 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2635 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2636 {
2637   PetscInt rbs,cbs;
2638   PetscErrorCode ierr;
2639 
2640   PetscFunctionBegin;
2641   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2642   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2643   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2644   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2645   PetscFunctionReturn(0);
2646 }
2647 
2648 #undef __FUNCT__
2649 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2650 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2651 {
2652   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2653   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2654   MatScalar      *aa = a->a;
2655 
2656   PetscFunctionBegin;
2657   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2658   PetscFunctionReturn(0);
2659 }
2660 
2661 #undef __FUNCT__
2662 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2663 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2664 {
2665   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2666   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2667   MatScalar      *aa = a->a;
2668 
2669   PetscFunctionBegin;
2670   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2671   PetscFunctionReturn(0);
2672 }
2673 
2674 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2675 
2676 #undef __FUNCT__
2677 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2678 /*
2679     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2680 */
2681 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2682 {
2683   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2684   PetscErrorCode ierr;
2685   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2686   PetscInt       nz = a->i[m],row,*jj,mr,col;
2687 
2688   PetscFunctionBegin;
2689   *nn = n;
2690   if (!ia) PetscFunctionReturn(0);
2691   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2692   else {
2693     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2694     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2695     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2696     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2697     jj = a->j;
2698     for (i=0; i<nz; i++) {
2699       collengths[jj[i]]++;
2700     }
2701     cia[0] = oshift;
2702     for (i=0; i<n; i++) {
2703       cia[i+1] = cia[i] + collengths[i];
2704     }
2705     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2706     jj   = a->j;
2707     for (row=0; row<m; row++) {
2708       mr = a->i[row+1] - a->i[row];
2709       for (i=0; i<mr; i++) {
2710         col = *jj++;
2711         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2712       }
2713     }
2714     ierr = PetscFree(collengths);CHKERRQ(ierr);
2715     *ia = cia; *ja = cja;
2716   }
2717   PetscFunctionReturn(0);
2718 }
2719 
2720 #undef __FUNCT__
2721 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2722 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2723 {
2724   PetscErrorCode ierr;
2725 
2726   PetscFunctionBegin;
2727   if (!ia) PetscFunctionReturn(0);
2728   ierr = PetscFree(*ia);CHKERRQ(ierr);
2729   ierr = PetscFree(*ja);CHKERRQ(ierr);
2730   PetscFunctionReturn(0);
2731 }
2732 
2733 #undef __FUNCT__
2734 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2735 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2736 {
2737   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2738   PetscErrorCode ierr;
2739   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2740   PetscScalar    dx,*y,*xx,*w3_array;
2741   PetscScalar    *vscale_array;
2742   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2743   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2744   void           *fctx = coloring->fctx;
2745   PetscBool      flg = PETSC_FALSE;
2746   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2747   Vec            x1_tmp;
2748 
2749   PetscFunctionBegin;
2750   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2751   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2752   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2753   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2754 
2755   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2756   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2757   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2758   if (flg) {
2759     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2760   } else {
2761     PetscBool  assembled;
2762     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2763     if (assembled) {
2764       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2765     }
2766   }
2767 
2768   x1_tmp = x1;
2769   if (!coloring->vscale){
2770     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2771   }
2772 
2773   /*
2774     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2775     coloring->F for the coarser grids from the finest
2776   */
2777   if (coloring->F) {
2778     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2779     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2780     if (m1 != m2) {
2781       coloring->F = 0;
2782       }
2783     }
2784 
2785   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2786     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2787   }
2788   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2789 
2790   /* Set w1 = F(x1) */
2791   if (coloring->F) {
2792     w1          = coloring->F; /* use already computed value of function */
2793     coloring->F = 0;
2794   } else {
2795     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2796     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2797     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2798   }
2799 
2800   if (!coloring->w3) {
2801     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2802     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2803   }
2804   w3 = coloring->w3;
2805 
2806     CHKMEMQ;
2807     /* Compute all the local scale factors, including ghost points */
2808   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2809   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2810   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2811   if (ctype == IS_COLORING_GHOSTED){
2812     col_start = 0; col_end = N;
2813   } else if (ctype == IS_COLORING_GLOBAL){
2814     xx = xx - start;
2815     vscale_array = vscale_array - start;
2816     col_start = start; col_end = N + start;
2817   }    CHKMEMQ;
2818   for (col=col_start; col<col_end; col++){
2819     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2820     if (coloring->htype[0] == 'w') {
2821       dx = 1.0 + unorm;
2822     } else {
2823       dx  = xx[col];
2824     }
2825     if (dx == 0.0) dx = 1.0;
2826 #if !defined(PETSC_USE_COMPLEX)
2827     if (dx < umin && dx >= 0.0)      dx = umin;
2828     else if (dx < 0.0 && dx > -umin) dx = -umin;
2829 #else
2830     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2831     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2832 #endif
2833     dx               *= epsilon;
2834     vscale_array[col] = 1.0/dx;
2835   }     CHKMEMQ;
2836   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2837   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2838   if (ctype == IS_COLORING_GLOBAL){
2839     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2840     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2841   }
2842   CHKMEMQ;
2843   if (coloring->vscaleforrow) {
2844     vscaleforrow = coloring->vscaleforrow;
2845   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2846 
2847   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2848   /*
2849     Loop over each color
2850   */
2851   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2852   for (k=0; k<coloring->ncolors; k++) {
2853     coloring->currentcolor = k;
2854     for (i=0; i<bs; i++) {
2855       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2856       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2857       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2858       /*
2859 	Loop over each column associated with color
2860 	adding the perturbation to the vector w3.
2861       */
2862       for (l=0; l<coloring->ncolumns[k]; l++) {
2863 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2864 	if (coloring->htype[0] == 'w') {
2865 	  dx = 1.0 + unorm;
2866 	} else {
2867 	  dx  = xx[col];
2868 	}
2869 	if (dx == 0.0) dx = 1.0;
2870 #if !defined(PETSC_USE_COMPLEX)
2871 	if (dx < umin && dx >= 0.0)      dx = umin;
2872 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2873 #else
2874 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2875 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2876 #endif
2877 	dx            *= epsilon;
2878 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2879 	w3_array[col] += dx;
2880       }
2881       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2882       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2883 
2884       /*
2885 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2886 	w2 = F(x1 + dx) - F(x1)
2887       */
2888       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2889       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2890       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2891       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2892 
2893       /*
2894 	Loop over rows of vector, putting results into Jacobian matrix
2895       */
2896       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2897       for (l=0; l<coloring->nrows[k]; l++) {
2898 	row    = bs*coloring->rows[k][l];             /* local row index */
2899 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2900         for (j=0; j<bs; j++) {
2901   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2902           srows[j]  = row + start + j;
2903         }
2904 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2905       }
2906       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2907     }
2908   } /* endof for each color */
2909   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2910   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2911   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2912   ierr = PetscFree(srows);CHKERRQ(ierr);
2913 
2914   coloring->currentcolor = -1;
2915   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2916   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2917   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2918   PetscFunctionReturn(0);
2919 }
2920 
2921 /* -------------------------------------------------------------------*/
2922 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2923        MatGetRow_SeqBAIJ,
2924        MatRestoreRow_SeqBAIJ,
2925        MatMult_SeqBAIJ_N,
2926 /* 4*/ MatMultAdd_SeqBAIJ_N,
2927        MatMultTranspose_SeqBAIJ,
2928        MatMultTransposeAdd_SeqBAIJ,
2929        0,
2930        0,
2931        0,
2932 /*10*/ 0,
2933        MatLUFactor_SeqBAIJ,
2934        0,
2935        0,
2936        MatTranspose_SeqBAIJ,
2937 /*15*/ MatGetInfo_SeqBAIJ,
2938        MatEqual_SeqBAIJ,
2939        MatGetDiagonal_SeqBAIJ,
2940        MatDiagonalScale_SeqBAIJ,
2941        MatNorm_SeqBAIJ,
2942 /*20*/ 0,
2943        MatAssemblyEnd_SeqBAIJ,
2944        MatSetOption_SeqBAIJ,
2945        MatZeroEntries_SeqBAIJ,
2946 /*24*/ MatZeroRows_SeqBAIJ,
2947        0,
2948        0,
2949        0,
2950        0,
2951 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2952        0,
2953        0,
2954        MatGetArray_SeqBAIJ,
2955        MatRestoreArray_SeqBAIJ,
2956 /*34*/ MatDuplicate_SeqBAIJ,
2957        0,
2958        0,
2959        MatILUFactor_SeqBAIJ,
2960        0,
2961 /*39*/ MatAXPY_SeqBAIJ,
2962        MatGetSubMatrices_SeqBAIJ,
2963        MatIncreaseOverlap_SeqBAIJ,
2964        MatGetValues_SeqBAIJ,
2965        MatCopy_SeqBAIJ,
2966 /*44*/ 0,
2967        MatScale_SeqBAIJ,
2968        0,
2969        0,
2970        MatZeroRowsColumns_SeqBAIJ,
2971 /*49*/ MatSetBlockSize_SeqBAIJ,
2972        MatGetRowIJ_SeqBAIJ,
2973        MatRestoreRowIJ_SeqBAIJ,
2974        MatGetColumnIJ_SeqBAIJ,
2975        MatRestoreColumnIJ_SeqBAIJ,
2976 /*54*/ MatFDColoringCreate_SeqAIJ,
2977        0,
2978        0,
2979        0,
2980        MatSetValuesBlocked_SeqBAIJ,
2981 /*59*/ MatGetSubMatrix_SeqBAIJ,
2982        MatDestroy_SeqBAIJ,
2983        MatView_SeqBAIJ,
2984        0,
2985        0,
2986 /*64*/ 0,
2987        0,
2988        0,
2989        0,
2990        0,
2991 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2992        0,
2993        MatConvert_Basic,
2994        0,
2995        0,
2996 /*74*/ 0,
2997        MatFDColoringApply_BAIJ,
2998        0,
2999        0,
3000        0,
3001 /*79*/ 0,
3002        0,
3003        0,
3004        0,
3005        MatLoad_SeqBAIJ,
3006 /*84*/ 0,
3007        0,
3008        0,
3009        0,
3010        0,
3011 /*89*/ 0,
3012        0,
3013        0,
3014        0,
3015        0,
3016 /*94*/ 0,
3017        0,
3018        0,
3019        0,
3020        0,
3021 /*99*/0,
3022        0,
3023        0,
3024        0,
3025        0,
3026 /*104*/0,
3027        MatRealPart_SeqBAIJ,
3028        MatImaginaryPart_SeqBAIJ,
3029        0,
3030        0,
3031 /*109*/0,
3032        0,
3033        0,
3034        0,
3035        MatMissingDiagonal_SeqBAIJ,
3036 /*114*/0,
3037        0,
3038        0,
3039        0,
3040        0,
3041 /*119*/0,
3042        0,
3043        MatMultHermitianTranspose_SeqBAIJ,
3044        MatMultHermitianTransposeAdd_SeqBAIJ,
3045        0,
3046 };
3047 
3048 EXTERN_C_BEGIN
3049 #undef __FUNCT__
3050 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3051 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3052 {
3053   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3054   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3055   PetscErrorCode ierr;
3056 
3057   PetscFunctionBegin;
3058   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3059 
3060   /* allocate space for values if not already there */
3061   if (!aij->saved_values) {
3062     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3063     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3064   }
3065 
3066   /* copy values over */
3067   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3068   PetscFunctionReturn(0);
3069 }
3070 EXTERN_C_END
3071 
3072 EXTERN_C_BEGIN
3073 #undef __FUNCT__
3074 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3075 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3076 {
3077   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3078   PetscErrorCode ierr;
3079   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3080 
3081   PetscFunctionBegin;
3082   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3083   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3084 
3085   /* copy values over */
3086   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3087   PetscFunctionReturn(0);
3088 }
3089 EXTERN_C_END
3090 
3091 EXTERN_C_BEGIN
3092 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3093 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3094 EXTERN_C_END
3095 
3096 EXTERN_C_BEGIN
3097 #undef __FUNCT__
3098 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3099 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3100 {
3101   Mat_SeqBAIJ    *b;
3102   PetscErrorCode ierr;
3103   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
3104   PetscBool      flg,skipallocation = PETSC_FALSE;
3105 
3106   PetscFunctionBegin;
3107 
3108   if (nz == MAT_SKIP_ALLOCATION) {
3109     skipallocation = PETSC_TRUE;
3110     nz             = 0;
3111   }
3112 
3113   if (bs < 0) {
3114     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
3115       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3116     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3117     bs   = PetscAbs(bs);
3118   }
3119   if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
3120   bs   = newbs;
3121 
3122   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3123   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3124   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3125   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3126 
3127   B->preallocated = PETSC_TRUE;
3128 
3129   mbs  = B->rmap->n/bs;
3130   nbs  = B->cmap->n/bs;
3131   bs2  = bs*bs;
3132 
3133   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3134 
3135   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3136   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3137   if (nnz) {
3138     for (i=0; i<mbs; i++) {
3139       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3140       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3141     }
3142   }
3143 
3144   b       = (Mat_SeqBAIJ*)B->data;
3145   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3146     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3147   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3148 
3149   if (!flg) {
3150     switch (bs) {
3151     case 1:
3152       B->ops->mult            = MatMult_SeqBAIJ_1;
3153       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3154       B->ops->sor             = MatSOR_SeqBAIJ_1;
3155       break;
3156     case 2:
3157       B->ops->mult            = MatMult_SeqBAIJ_2;
3158       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3159       B->ops->sor             = MatSOR_SeqBAIJ_2;
3160       break;
3161     case 3:
3162       B->ops->mult            = MatMult_SeqBAIJ_3;
3163       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3164       B->ops->sor             = MatSOR_SeqBAIJ_3;
3165       break;
3166     case 4:
3167       B->ops->mult            = MatMult_SeqBAIJ_4;
3168       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3169       B->ops->sor             = MatSOR_SeqBAIJ_4;
3170       break;
3171     case 5:
3172       B->ops->mult            = MatMult_SeqBAIJ_5;
3173       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3174       B->ops->sor             = MatSOR_SeqBAIJ_5;
3175       break;
3176     case 6:
3177       B->ops->mult            = MatMult_SeqBAIJ_6;
3178       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3179       B->ops->sor             = MatSOR_SeqBAIJ_6;
3180       break;
3181     case 7:
3182       B->ops->mult            = MatMult_SeqBAIJ_7;
3183       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3184       B->ops->sor             = MatSOR_SeqBAIJ_7;
3185       break;
3186     case 15:
3187       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3188       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3189       B->ops->sor             = MatSOR_SeqBAIJ_N;
3190       break;
3191     default:
3192       B->ops->mult            = MatMult_SeqBAIJ_N;
3193       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3194       B->ops->sor             = MatSOR_SeqBAIJ_N;
3195       break;
3196     }
3197   }
3198   B->rmap->bs  = bs;
3199   b->mbs       = mbs;
3200   b->nbs       = nbs;
3201   if (!skipallocation) {
3202     if (!b->imax) {
3203       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3204       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3205       b->free_imax_ilen = PETSC_TRUE;
3206     }
3207     /* b->ilen will count nonzeros in each block row so far. */
3208     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3209     if (!nnz) {
3210       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3211       else if (nz < 0) nz = 1;
3212       for (i=0; i<mbs; i++) b->imax[i] = nz;
3213       nz = nz*mbs;
3214     } else {
3215       nz = 0;
3216       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3217     }
3218 
3219     /* allocate the matrix space */
3220     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3221     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3222     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3223     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3224     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3225     b->singlemalloc = PETSC_TRUE;
3226     b->i[0] = 0;
3227     for (i=1; i<mbs+1; i++) {
3228       b->i[i] = b->i[i-1] + b->imax[i-1];
3229     }
3230     b->free_a     = PETSC_TRUE;
3231     b->free_ij    = PETSC_TRUE;
3232   } else {
3233     b->free_a     = PETSC_FALSE;
3234     b->free_ij    = PETSC_FALSE;
3235   }
3236 
3237   B->rmap->bs          = bs;
3238   b->bs2              = bs2;
3239   b->mbs              = mbs;
3240   b->nz               = 0;
3241   b->maxnz            = nz;
3242   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3243   PetscFunctionReturn(0);
3244 }
3245 EXTERN_C_END
3246 
3247 EXTERN_C_BEGIN
3248 #undef __FUNCT__
3249 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3250 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3251 {
3252   PetscInt       i,m,nz,nz_max=0,*nnz;
3253   PetscScalar    *values=0;
3254   PetscErrorCode ierr;
3255 
3256   PetscFunctionBegin;
3257   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3258   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3259   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3260   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3261   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3262   m = B->rmap->n/bs;
3263 
3264   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3265   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3266   for(i=0; i<m; i++) {
3267     nz = ii[i+1]- ii[i];
3268     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3269     nz_max = PetscMax(nz_max, nz);
3270     nnz[i] = nz;
3271   }
3272   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3273   ierr = PetscFree(nnz);CHKERRQ(ierr);
3274 
3275   values = (PetscScalar*)V;
3276   if (!values) {
3277     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3278     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3279   }
3280   for (i=0; i<m; i++) {
3281     PetscInt          ncols  = ii[i+1] - ii[i];
3282     const PetscInt    *icols = jj + ii[i];
3283     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3284     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3285   }
3286   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3287   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3288   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3289 
3290   PetscFunctionReturn(0);
3291 }
3292 EXTERN_C_END
3293 
3294 
3295 EXTERN_C_BEGIN
3296 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3297 #if defined(PETSC_HAVE_MUMPS)
3298 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3299 #endif
3300 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3301 EXTERN_C_END
3302 
3303 /*MC
3304    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3305    block sparse compressed row format.
3306 
3307    Options Database Keys:
3308 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3309 
3310   Level: beginner
3311 
3312 .seealso: MatCreateSeqBAIJ()
3313 M*/
3314 
3315 
3316 EXTERN_C_BEGIN
3317 #undef __FUNCT__
3318 #define __FUNCT__ "MatCreate_SeqBAIJ"
3319 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3320 {
3321   PetscErrorCode ierr;
3322   PetscMPIInt    size;
3323   Mat_SeqBAIJ    *b;
3324 
3325   PetscFunctionBegin;
3326   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3327   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3328 
3329   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3330   B->data = (void*)b;
3331   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3332   b->row                   = 0;
3333   b->col                   = 0;
3334   b->icol                  = 0;
3335   b->reallocs              = 0;
3336   b->saved_values          = 0;
3337 
3338   b->roworiented           = PETSC_TRUE;
3339   b->nonew                 = 0;
3340   b->diag                  = 0;
3341   b->solve_work            = 0;
3342   b->mult_work             = 0;
3343   B->spptr                 = 0;
3344   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3345   b->keepnonzeropattern    = PETSC_FALSE;
3346   b->xtoy                  = 0;
3347   b->XtoY                  = 0;
3348   B->same_nonzero          = PETSC_FALSE;
3349 
3350   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3351                                      "MatGetFactorAvailable_seqbaij_petsc",
3352                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3353   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3354                                      "MatGetFactor_seqbaij_petsc",
3355                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3356 #if defined(PETSC_HAVE_MUMPS)
3357   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3358 #endif
3359   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3360                                      "MatInvertBlockDiagonal_SeqBAIJ",
3361                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3362   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3363                                      "MatStoreValues_SeqBAIJ",
3364                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3365   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3366                                      "MatRetrieveValues_SeqBAIJ",
3367                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3368   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3369                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3370                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3371   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3372                                      "MatConvert_SeqBAIJ_SeqAIJ",
3373                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3374   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3375                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3376                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3377   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3378                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3379                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3380   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3381                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3382                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3383   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3384   PetscFunctionReturn(0);
3385 }
3386 EXTERN_C_END
3387 
3388 #undef __FUNCT__
3389 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3390 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3391 {
3392   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3393   PetscErrorCode ierr;
3394   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3395 
3396   PetscFunctionBegin;
3397   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3398 
3399   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3400     c->imax = a->imax;
3401     c->ilen = a->ilen;
3402     c->free_imax_ilen = PETSC_FALSE;
3403   } else {
3404     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3405     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3406     for (i=0; i<mbs; i++) {
3407       c->imax[i] = a->imax[i];
3408       c->ilen[i] = a->ilen[i];
3409     }
3410     c->free_imax_ilen = PETSC_TRUE;
3411   }
3412 
3413   /* allocate the matrix space */
3414   if (mallocmatspace){
3415     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3416       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3417       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3418       c->singlemalloc = PETSC_FALSE;
3419       c->free_ij      = PETSC_FALSE;
3420       c->i            = a->i;
3421       c->j            = a->j;
3422       c->parent       = A;
3423       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3424       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3425       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3426     } else {
3427       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3428       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3429       c->singlemalloc = PETSC_TRUE;
3430       c->free_ij      = PETSC_TRUE;
3431       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3432       if (mbs > 0) {
3433 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3434 	if (cpvalues == MAT_COPY_VALUES) {
3435 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3436 	} else {
3437 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3438 	}
3439       }
3440     }
3441   }
3442 
3443   c->roworiented = a->roworiented;
3444   c->nonew       = a->nonew;
3445   ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr);
3446   ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr);
3447   c->bs2         = a->bs2;
3448   c->mbs         = a->mbs;
3449   c->nbs         = a->nbs;
3450 
3451   if (a->diag) {
3452     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3453       c->diag      = a->diag;
3454       c->free_diag = PETSC_FALSE;
3455     } else {
3456       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3457       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3458       for (i=0; i<mbs; i++) {
3459         c->diag[i] = a->diag[i];
3460       }
3461       c->free_diag = PETSC_TRUE;
3462     }
3463   } else c->diag        = 0;
3464   c->nz                 = a->nz;
3465   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3466   c->solve_work         = 0;
3467   c->mult_work          = 0;
3468   c->free_a             = PETSC_TRUE;
3469   c->free_ij            = PETSC_TRUE;
3470   C->preallocated       = PETSC_TRUE;
3471   C->assembled          = PETSC_TRUE;
3472 
3473   c->compressedrow.use     = a->compressedrow.use;
3474   c->compressedrow.nrows   = a->compressedrow.nrows;
3475   c->compressedrow.check   = a->compressedrow.check;
3476   if (a->compressedrow.use){
3477     i = a->compressedrow.nrows;
3478     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3479     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3480     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3481     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3482   } else {
3483     c->compressedrow.use    = PETSC_FALSE;
3484     c->compressedrow.i      = PETSC_NULL;
3485     c->compressedrow.rindex = PETSC_NULL;
3486   }
3487   C->same_nonzero = A->same_nonzero;
3488   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3489   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3490   PetscFunctionReturn(0);
3491 }
3492 
3493 #undef __FUNCT__
3494 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3495 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3496 {
3497     PetscErrorCode ierr;
3498 
3499   PetscFunctionBegin;
3500   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3501   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3502   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3503   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3504   PetscFunctionReturn(0);
3505 }
3506 
3507 #undef __FUNCT__
3508 #define __FUNCT__ "MatLoad_SeqBAIJ"
3509 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3510 {
3511   Mat_SeqBAIJ    *a;
3512   PetscErrorCode ierr;
3513   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3514   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3515   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3516   PetscInt       *masked,nmask,tmp,bs2,ishift;
3517   PetscMPIInt    size;
3518   int            fd;
3519   PetscScalar    *aa;
3520   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3521 
3522   PetscFunctionBegin;
3523   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3524   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3525   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3526   bs2  = bs*bs;
3527 
3528   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3529   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3530   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3531   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3532   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3533   M = header[1]; N = header[2]; nz = header[3];
3534 
3535   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3536   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3537 
3538   /*
3539      This code adds extra rows to make sure the number of rows is
3540     divisible by the blocksize
3541   */
3542   mbs        = M/bs;
3543   extra_rows = bs - M + bs*(mbs);
3544   if (extra_rows == bs) extra_rows = 0;
3545   else                  mbs++;
3546   if (extra_rows) {
3547     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3548   }
3549 
3550   /* Set global sizes if not already set */
3551   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3552     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3553   } else { /* Check if the matrix global sizes are correct */
3554     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3555     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3556   }
3557 
3558   /* read in row lengths */
3559   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3560   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3561   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3562 
3563   /* read in column indices */
3564   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3565   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3566   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3567 
3568   /* loop over row lengths determining block row lengths */
3569   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3570   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3571   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3572   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3573   rowcount = 0;
3574   nzcount = 0;
3575   for (i=0; i<mbs; i++) {
3576     nmask = 0;
3577     for (j=0; j<bs; j++) {
3578       kmax = rowlengths[rowcount];
3579       for (k=0; k<kmax; k++) {
3580         tmp = jj[nzcount++]/bs;
3581         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3582       }
3583       rowcount++;
3584     }
3585     browlengths[i] += nmask;
3586     /* zero out the mask elements we set */
3587     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3588   }
3589 
3590   /* Do preallocation  */
3591   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3592   a = (Mat_SeqBAIJ*)newmat->data;
3593 
3594   /* set matrix "i" values */
3595   a->i[0] = 0;
3596   for (i=1; i<= mbs; i++) {
3597     a->i[i]      = a->i[i-1] + browlengths[i-1];
3598     a->ilen[i-1] = browlengths[i-1];
3599   }
3600   a->nz         = 0;
3601   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3602 
3603   /* read in nonzero values */
3604   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3605   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3606   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3607 
3608   /* set "a" and "j" values into matrix */
3609   nzcount = 0; jcount = 0;
3610   for (i=0; i<mbs; i++) {
3611     nzcountb = nzcount;
3612     nmask    = 0;
3613     for (j=0; j<bs; j++) {
3614       kmax = rowlengths[i*bs+j];
3615       for (k=0; k<kmax; k++) {
3616         tmp = jj[nzcount++]/bs;
3617 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3618       }
3619     }
3620     /* sort the masked values */
3621     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3622 
3623     /* set "j" values into matrix */
3624     maskcount = 1;
3625     for (j=0; j<nmask; j++) {
3626       a->j[jcount++]  = masked[j];
3627       mask[masked[j]] = maskcount++;
3628     }
3629     /* set "a" values into matrix */
3630     ishift = bs2*a->i[i];
3631     for (j=0; j<bs; j++) {
3632       kmax = rowlengths[i*bs+j];
3633       for (k=0; k<kmax; k++) {
3634         tmp       = jj[nzcountb]/bs ;
3635         block     = mask[tmp] - 1;
3636         point     = jj[nzcountb] - bs*tmp;
3637         idx       = ishift + bs2*block + j + bs*point;
3638         a->a[idx] = (MatScalar)aa[nzcountb++];
3639       }
3640     }
3641     /* zero out the mask elements we set */
3642     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3643   }
3644   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3645 
3646   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3647   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3648   ierr = PetscFree(aa);CHKERRQ(ierr);
3649   ierr = PetscFree(jj);CHKERRQ(ierr);
3650   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3651 
3652   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3653   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3654   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3655   PetscFunctionReturn(0);
3656 }
3657 
3658 #undef __FUNCT__
3659 #define __FUNCT__ "MatCreateSeqBAIJ"
3660 /*@C
3661    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3662    compressed row) format.  For good matrix assembly performance the
3663    user should preallocate the matrix storage by setting the parameter nz
3664    (or the array nnz).  By setting these parameters accurately, performance
3665    during matrix assembly can be increased by more than a factor of 50.
3666 
3667    Collective on MPI_Comm
3668 
3669    Input Parameters:
3670 +  comm - MPI communicator, set to PETSC_COMM_SELF
3671 .  bs - size of block
3672 .  m - number of rows
3673 .  n - number of columns
3674 .  nz - number of nonzero blocks  per block row (same for all rows)
3675 -  nnz - array containing the number of nonzero blocks in the various block rows
3676          (possibly different for each block row) or PETSC_NULL
3677 
3678    Output Parameter:
3679 .  A - the matrix
3680 
3681    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3682    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3683    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3684 
3685    Options Database Keys:
3686 .   -mat_no_unroll - uses code that does not unroll the loops in the
3687                      block calculations (much slower)
3688 .    -mat_block_size - size of the blocks to use
3689 
3690    Level: intermediate
3691 
3692    Notes:
3693    The number of rows and columns must be divisible by blocksize.
3694 
3695    If the nnz parameter is given then the nz parameter is ignored
3696 
3697    A nonzero block is any block that as 1 or more nonzeros in it
3698 
3699    The block AIJ format is fully compatible with standard Fortran 77
3700    storage.  That is, the stored row and column indices can begin at
3701    either one (as in Fortran) or zero.  See the users' manual for details.
3702 
3703    Specify the preallocated storage with either nz or nnz (not both).
3704    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3705    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3706    matrices.
3707 
3708 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3709 @*/
3710 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3711 {
3712   PetscErrorCode ierr;
3713 
3714   PetscFunctionBegin;
3715   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3716   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3717   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3718   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3719   PetscFunctionReturn(0);
3720 }
3721 
3722 #undef __FUNCT__
3723 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3724 /*@C
3725    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3726    per row in the matrix. For good matrix assembly performance the
3727    user should preallocate the matrix storage by setting the parameter nz
3728    (or the array nnz).  By setting these parameters accurately, performance
3729    during matrix assembly can be increased by more than a factor of 50.
3730 
3731    Collective on MPI_Comm
3732 
3733    Input Parameters:
3734 +  A - the matrix
3735 .  bs - size of block
3736 .  nz - number of block nonzeros per block row (same for all rows)
3737 -  nnz - array containing the number of block nonzeros in the various block rows
3738          (possibly different for each block row) or PETSC_NULL
3739 
3740    Options Database Keys:
3741 .   -mat_no_unroll - uses code that does not unroll the loops in the
3742                      block calculations (much slower)
3743 .    -mat_block_size - size of the blocks to use
3744 
3745    Level: intermediate
3746 
3747    Notes:
3748    If the nnz parameter is given then the nz parameter is ignored
3749 
3750    You can call MatGetInfo() to get information on how effective the preallocation was;
3751    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3752    You can also run with the option -info and look for messages with the string
3753    malloc in them to see if additional memory allocation was needed.
3754 
3755    The block AIJ format is fully compatible with standard Fortran 77
3756    storage.  That is, the stored row and column indices can begin at
3757    either one (as in Fortran) or zero.  See the users' manual for details.
3758 
3759    Specify the preallocated storage with either nz or nnz (not both).
3760    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3761    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3762 
3763 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3764 @*/
3765 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3766 {
3767   PetscErrorCode ierr;
3768 
3769   PetscFunctionBegin;
3770   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3771   PetscFunctionReturn(0);
3772 }
3773 
3774 #undef __FUNCT__
3775 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3776 /*@C
3777    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3778    (the default sequential PETSc format).
3779 
3780    Collective on MPI_Comm
3781 
3782    Input Parameters:
3783 +  A - the matrix
3784 .  i - the indices into j for the start of each local row (starts with zero)
3785 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3786 -  v - optional values in the matrix
3787 
3788    Level: developer
3789 
3790 .keywords: matrix, aij, compressed row, sparse
3791 
3792 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3793 @*/
3794 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3795 {
3796   PetscErrorCode ierr;
3797 
3798   PetscFunctionBegin;
3799   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3800   PetscFunctionReturn(0);
3801 }
3802 
3803 
3804 #undef __FUNCT__
3805 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3806 /*@
3807      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3808 
3809      Collective on MPI_Comm
3810 
3811    Input Parameters:
3812 +  comm - must be an MPI communicator of size 1
3813 .  bs - size of block
3814 .  m - number of rows
3815 .  n - number of columns
3816 .  i - row indices
3817 .  j - column indices
3818 -  a - matrix values
3819 
3820    Output Parameter:
3821 .  mat - the matrix
3822 
3823    Level: advanced
3824 
3825    Notes:
3826        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3827     once the matrix is destroyed
3828 
3829        You cannot set new nonzero locations into this matrix, that will generate an error.
3830 
3831        The i and j indices are 0 based
3832 
3833        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3834 
3835 
3836 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3837 
3838 @*/
3839 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3840 {
3841   PetscErrorCode ierr;
3842   PetscInt       ii;
3843   Mat_SeqBAIJ    *baij;
3844 
3845   PetscFunctionBegin;
3846   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3847   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3848 
3849   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3850   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3851   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3852   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3853   baij = (Mat_SeqBAIJ*)(*mat)->data;
3854   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3855   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3856 
3857   baij->i = i;
3858   baij->j = j;
3859   baij->a = a;
3860   baij->singlemalloc = PETSC_FALSE;
3861   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3862   baij->free_a       = PETSC_FALSE;
3863   baij->free_ij       = PETSC_FALSE;
3864 
3865   for (ii=0; ii<m; ii++) {
3866     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3867 #if defined(PETSC_USE_DEBUG)
3868     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3869 #endif
3870   }
3871 #if defined(PETSC_USE_DEBUG)
3872   for (ii=0; ii<baij->i[m]; ii++) {
3873     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3874     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3875   }
3876 #endif
3877 
3878   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3879   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3880   PetscFunctionReturn(0);
3881 }
3882