xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 5d4804778dfa45ea00bcad8aabdb3eca36eaeac3)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
12 /*@
13   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
14 
15   Collective on Mat
16 
17   Input Parameters:
18 . mat - the matrix
19 
20   Level: advanced
21 @*/
22 PetscErrorCode  MatSeqBAIJInvertBlockDiagonal(Mat mat)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
28   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
29   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
30   ierr = PetscUseMethod(mat,"MatSeqBAIJInvertBlockDiagonal_C",(Mat),(mat));CHKERRQ(ierr);
31   PetscFunctionReturn(0);
32 }
33 
34 EXTERN_C_BEGIN
35 #undef __FUNCT__
36 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
37 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A)
38 {
39   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
40   PetscErrorCode ierr;
41   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
42   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
43   PetscReal      shift = 0.0;
44 
45   PetscFunctionBegin;
46   if (a->idiagvalid) PetscFunctionReturn(0);
47   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
48   diag_offset = a->diag;
49   if (!a->idiag) {
50     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
51     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
52   }
53   diag  = a->idiag;
54   mdiag = a->idiag+bs2*mbs;
55   /* factor and invert each block */
56   switch (bs){
57     case 1:
58       for (i=0; i<mbs; i++) {
59         odiag = v + 1*diag_offset[i];
60         diag[0]  = odiag[0];
61         mdiag[0] = odiag[0];
62         diag[0]  = 1.0 / (diag[0] + shift);
63         diag    += 1;
64         mdiag   += 1;
65       }
66       break;
67     case 2:
68       for (i=0; i<mbs; i++) {
69         odiag   = v + 4*diag_offset[i];
70         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
71 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
72 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
73 	diag    += 4;
74 	mdiag   += 4;
75       }
76       break;
77     case 3:
78       for (i=0; i<mbs; i++) {
79         odiag    = v + 9*diag_offset[i];
80         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
81         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
82         diag[8]  = odiag[8];
83         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
84         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
85         mdiag[8] = odiag[8];
86 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
87 	diag    += 9;
88 	mdiag   += 9;
89       }
90       break;
91     case 4:
92       for (i=0; i<mbs; i++) {
93         odiag  = v + 16*diag_offset[i];
94         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
95         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
96 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
97 	diag  += 16;
98 	mdiag += 16;
99       }
100       break;
101     case 5:
102       for (i=0; i<mbs; i++) {
103         odiag = v + 25*diag_offset[i];
104         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
105         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
106 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
107 	diag  += 25;
108 	mdiag += 25;
109       }
110       break;
111     case 6:
112       for (i=0; i<mbs; i++) {
113         odiag = v + 36*diag_offset[i];
114         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
115         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
116 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
117 	diag  += 36;
118 	mdiag += 36;
119       }
120       break;
121     case 7:
122       for (i=0; i<mbs; i++) {
123         odiag = v + 49*diag_offset[i];
124         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
125         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
126 	ierr   = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
127 	diag  += 49;
128 	mdiag += 49;
129       }
130       break;
131     default:
132       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
133       for (i=0; i<mbs; i++) {
134         odiag = v + bs2*diag_offset[i];
135         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
136         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
137         ierr   = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
138 	diag  += bs2;
139 	mdiag += bs2;
140       }
141       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
142   }
143   a->idiagvalid = PETSC_TRUE;
144   PetscFunctionReturn(0);
145 }
146 EXTERN_C_END
147 
148 #undef __FUNCT__
149 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
150 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
151 {
152   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
153   PetscScalar        *x,x1,s1;
154   const PetscScalar  *b;
155   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
156   PetscErrorCode     ierr;
157   PetscInt           m = a->mbs,i,i2,nz,j;
158   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
159 
160   PetscFunctionBegin;
161   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
162   its = its*lits;
163   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
164   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
165   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
166   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
167   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
168 
169   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
170 
171   diag  = a->diag;
172   idiag = a->idiag;
173   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
174   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
175 
176   if (flag & SOR_ZERO_INITIAL_GUESS) {
177     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
178       x[0] = b[0]*idiag[0];
179       i2     = 1;
180       idiag += 1;
181       for (i=1; i<m; i++) {
182         v     = aa + ai[i];
183         vi    = aj + ai[i];
184         nz    = diag[i] - ai[i];
185         s1    = b[i2];
186         for (j=0; j<nz; j++) {
187           s1 -= v[j]*x[vi[j]];
188         }
189         x[i2]   = idiag[0]*s1;
190         idiag   += 1;
191         i2      += 1;
192       }
193       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
194       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
195     }
196     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
197         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
198       i2    = 0;
199       mdiag = a->idiag+a->mbs;
200       for (i=0; i<m; i++) {
201         x1      = x[i2];
202         x[i2]   = mdiag[0]*x1;
203         mdiag  += 1;
204         i2     += 1;
205       }
206       ierr = PetscLogFlops(m);CHKERRQ(ierr);
207     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
208       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
209     }
210     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
211       idiag   = a->idiag+a->mbs - 1;
212       i2      = m - 1;
213       x1      = x[i2];
214       x[i2]   = idiag[0]*x1;
215       idiag -= 1;
216       i2    -= 1;
217       for (i=m-2; i>=0; i--) {
218         v     = aa + (diag[i]+1);
219         vi    = aj + diag[i] + 1;
220         nz    = ai[i+1] - diag[i] - 1;
221         s1    = x[i2];
222         for (j=0; j<nz; j++) {
223           s1 -= v[j]*x[vi[j]];
224         }
225         x[i2]   = idiag[0]*s1;
226         idiag   -= 1;
227         i2      -= 1;
228       }
229       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
230     }
231   } else {
232     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
233   }
234   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
235   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
241 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
242 {
243   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
244   PetscScalar        *x,x1,x2,s1,s2;
245   const PetscScalar  *b;
246   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
247   PetscErrorCode     ierr;
248   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
249   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
250 
251   PetscFunctionBegin;
252   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
253   its = its*lits;
254   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
255   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
256   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
257   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
258   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
259 
260   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
261 
262   diag  = a->diag;
263   idiag = a->idiag;
264   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
265   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
266 
267   if (flag & SOR_ZERO_INITIAL_GUESS) {
268     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
269       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
270       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
271       i2     = 2;
272       idiag += 4;
273       for (i=1; i<m; i++) {
274 	v     = aa + 4*ai[i];
275 	vi    = aj + ai[i];
276 	nz    = diag[i] - ai[i];
277 	s1    = b[i2]; s2 = b[i2+1];
278         for (j=0; j<nz; j++) {
279 	  idx  = 2*vi[j];
280           it   = 4*j;
281 	  x1   = x[idx]; x2 = x[1+idx];
282 	  s1  -= v[it]*x1 + v[it+2]*x2;
283 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
284 	}
285 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
286 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
287         idiag   += 4;
288         i2      += 2;
289       }
290       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
291       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
292     }
293     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
294         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
295       i2    = 0;
296       mdiag = a->idiag+4*a->mbs;
297       for (i=0; i<m; i++) {
298         x1      = x[i2]; x2 = x[i2+1];
299         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
300         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
301         mdiag  += 4;
302         i2     += 2;
303       }
304       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
305     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
306       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
307     }
308     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
309       idiag   = a->idiag+4*a->mbs - 4;
310       i2      = 2*m - 2;
311       x1      = x[i2]; x2 = x[i2+1];
312       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
313       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
314       idiag -= 4;
315       i2    -= 2;
316       for (i=m-2; i>=0; i--) {
317 	v     = aa + 4*(diag[i]+1);
318 	vi    = aj + diag[i] + 1;
319 	nz    = ai[i+1] - diag[i] - 1;
320 	s1    = x[i2]; s2 = x[i2+1];
321         for (j=0; j<nz; j++) {
322  	  idx  = 2*vi[j];
323           it   = 4*j;
324 	  x1   = x[idx]; x2 = x[1+idx];
325 	  s1  -= v[it]*x1 + v[it+2]*x2;
326 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
327 	}
328 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
329 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
330         idiag   -= 4;
331         i2      -= 2;
332       }
333       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
334     }
335   } else {
336     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
337   }
338   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
339   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
340   PetscFunctionReturn(0);
341 }
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
345 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
346 {
347   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
348   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
349   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
350   const PetscScalar  *b;
351   PetscErrorCode     ierr;
352   PetscInt           m = a->mbs,i,i2,nz,idx;
353   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
354 
355   PetscFunctionBegin;
356   its = its*lits;
357   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
358   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
359   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
360   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
361   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
362   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
363 
364   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
365 
366   diag  = a->diag;
367   idiag = a->idiag;
368   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
369   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
370 
371   if (flag & SOR_ZERO_INITIAL_GUESS) {
372     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
373       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
374       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
375       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
376       i2     = 3;
377       idiag += 9;
378       for (i=1; i<m; i++) {
379         v     = aa + 9*ai[i];
380         vi    = aj + ai[i];
381         nz    = diag[i] - ai[i];
382         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
383         while (nz--) {
384           idx  = 3*(*vi++);
385           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
386           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
387           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
388           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
389           v   += 9;
390         }
391         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
392         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
393         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
394         idiag   += 9;
395         i2      += 3;
396       }
397       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
398       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
399     }
400     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
401         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
402       i2    = 0;
403       mdiag = a->idiag+9*a->mbs;
404       for (i=0; i<m; i++) {
405         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
406         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
407         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
408         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
409         mdiag  += 9;
410         i2     += 3;
411       }
412       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
413     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
414       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
415     }
416     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
417       idiag   = a->idiag+9*a->mbs - 9;
418       i2      = 3*m - 3;
419       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
420       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
421       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
422       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
423       idiag -= 9;
424       i2    -= 3;
425       for (i=m-2; i>=0; i--) {
426         v     = aa + 9*(diag[i]+1);
427         vi    = aj + diag[i] + 1;
428         nz    = ai[i+1] - diag[i] - 1;
429         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
430         while (nz--) {
431           idx  = 3*(*vi++);
432           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
433           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
434           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
435           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
436           v   += 9;
437         }
438         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
439         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
440         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
441         idiag   -= 9;
442         i2      -= 3;
443       }
444       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
445     }
446   } else {
447     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
448   }
449   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
450   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
456 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
457 {
458   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
459   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
460   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
461   const PetscScalar  *b;
462   PetscErrorCode     ierr;
463   PetscInt           m = a->mbs,i,i2,nz,idx;
464   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
465 
466   PetscFunctionBegin;
467   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
468   its = its*lits;
469   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
470   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
471   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
472   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
473   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
474 
475   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
476 
477   diag  = a->diag;
478   idiag = a->idiag;
479   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
480   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
481 
482   if (flag & SOR_ZERO_INITIAL_GUESS) {
483     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
484       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
485       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
486       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
487       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
488       i2     = 4;
489       idiag += 16;
490       for (i=1; i<m; i++) {
491 	v     = aa + 16*ai[i];
492 	vi    = aj + ai[i];
493 	nz    = diag[i] - ai[i];
494 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
495 	while (nz--) {
496 	  idx  = 4*(*vi++);
497 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
498 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
499 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
500 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
501 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
502 	  v   += 16;
503 	}
504 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
505 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
506 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
507 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
508         idiag   += 16;
509         i2      += 4;
510       }
511       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
512       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
513     }
514     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
515         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
516       i2    = 0;
517       mdiag = a->idiag+16*a->mbs;
518       for (i=0; i<m; i++) {
519         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
520         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
521         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
522         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
523         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
524         mdiag  += 16;
525         i2     += 4;
526       }
527       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
528     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
529       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
530     }
531     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
532       idiag   = a->idiag+16*a->mbs - 16;
533       i2      = 4*m - 4;
534       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
535       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
536       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
537       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
538       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
539       idiag -= 16;
540       i2    -= 4;
541       for (i=m-2; i>=0; i--) {
542 	v     = aa + 16*(diag[i]+1);
543 	vi    = aj + diag[i] + 1;
544 	nz    = ai[i+1] - diag[i] - 1;
545 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
546 	while (nz--) {
547 	  idx  = 4*(*vi++);
548 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
549 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
550 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
551 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
552 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
553 	  v   += 16;
554 	}
555 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
556 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
557 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
558 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
559         idiag   -= 16;
560         i2      -= 4;
561       }
562       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
563     }
564   } else {
565     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
566   }
567   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
568   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
574 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
575 {
576   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
577   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
578   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
579   const PetscScalar  *b;
580   PetscErrorCode     ierr;
581   PetscInt           m = a->mbs,i,i2,nz,idx;
582   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
583 
584   PetscFunctionBegin;
585   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
586   its = its*lits;
587   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
588   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
589   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
590   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
591   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
592 
593   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
594 
595   diag  = a->diag;
596   idiag = a->idiag;
597   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
598   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
599 
600   if (flag & SOR_ZERO_INITIAL_GUESS) {
601     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
602       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
603       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
604       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
605       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
606       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
607       i2     = 5;
608       idiag += 25;
609       for (i=1; i<m; i++) {
610 	v     = aa + 25*ai[i];
611 	vi    = aj + ai[i];
612 	nz    = diag[i] - ai[i];
613 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
614 	while (nz--) {
615 	  idx  = 5*(*vi++);
616 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
617 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
618 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
619 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
620 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
621 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
622 	  v   += 25;
623 	}
624 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
625 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
626 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
627 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
628 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
629         idiag   += 25;
630         i2      += 5;
631       }
632       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
633       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
634     }
635     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
636         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
637       i2    = 0;
638       mdiag = a->idiag+25*a->mbs;
639       for (i=0; i<m; i++) {
640         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
641         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
642         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
643         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
644         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
645         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
646         mdiag  += 25;
647         i2     += 5;
648       }
649       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
650     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
651       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
652     }
653     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
654       idiag   = a->idiag+25*a->mbs - 25;
655       i2      = 5*m - 5;
656       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
657       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
658       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
659       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
660       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
661       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
662       idiag -= 25;
663       i2    -= 5;
664       for (i=m-2; i>=0; i--) {
665 	v     = aa + 25*(diag[i]+1);
666 	vi    = aj + diag[i] + 1;
667 	nz    = ai[i+1] - diag[i] - 1;
668 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
669 	while (nz--) {
670 	  idx  = 5*(*vi++);
671 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
672 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
673 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
674 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
675 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
676 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
677 	  v   += 25;
678 	}
679 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
680 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
681 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
682 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
683 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
684         idiag   -= 25;
685         i2      -= 5;
686       }
687       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
688     }
689   } else {
690     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
691   }
692   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
693   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
694   PetscFunctionReturn(0);
695 }
696 
697 #undef __FUNCT__
698 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
699 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
700 {
701   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
702   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
703   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
704   const PetscScalar  *b;
705   PetscErrorCode     ierr;
706   PetscInt           m = a->mbs,i,i2,nz,idx;
707   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
708 
709   PetscFunctionBegin;
710   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
711   its = its*lits;
712   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
713   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
714   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
715   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
716   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
717 
718   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
719 
720   diag  = a->diag;
721   idiag = a->idiag;
722   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
723   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
724 
725   if (flag & SOR_ZERO_INITIAL_GUESS) {
726     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
727       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
728       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
729       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
730       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
731       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
732       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
733       i2     = 6;
734       idiag += 36;
735       for (i=1; i<m; i++) {
736         v     = aa + 36*ai[i];
737         vi    = aj + ai[i];
738         nz    = diag[i] - ai[i];
739         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
740         while (nz--) {
741           idx  = 6*(*vi++);
742           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
743           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
744           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
745           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
746           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
747           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
748           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
749           v   += 36;
750         }
751         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
752         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
753         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
754         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
755         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
756         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
757         idiag   += 36;
758         i2      += 6;
759       }
760       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
761       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
762     }
763     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
764         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
765       i2    = 0;
766       mdiag = a->idiag+36*a->mbs;
767       for (i=0; i<m; i++) {
768         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
769         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
770         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
771         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
772         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
773         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
774         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
775         mdiag  += 36;
776         i2     += 6;
777       }
778       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
779     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
780       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
781     }
782     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
783       idiag   = a->idiag+36*a->mbs - 36;
784       i2      = 6*m - 6;
785       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
786       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
787       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
788       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
789       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
790       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
791       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
792       idiag -= 36;
793       i2    -= 6;
794       for (i=m-2; i>=0; i--) {
795         v     = aa + 36*(diag[i]+1);
796         vi    = aj + diag[i] + 1;
797         nz    = ai[i+1] - diag[i] - 1;
798         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
799         while (nz--) {
800           idx  = 6*(*vi++);
801           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
802           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
803           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
804           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
805           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
806           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
807           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
808           v   += 36;
809         }
810         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
811         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
812         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
813         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
814         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
815         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
816         idiag   -= 36;
817         i2      -= 6;
818       }
819       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
820     }
821   } else {
822     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
823   }
824   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
825   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
826   PetscFunctionReturn(0);
827 }
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
831 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
832 {
833   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
834   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
835   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
836   const PetscScalar  *b;
837   PetscErrorCode     ierr;
838   PetscInt           m = a->mbs,i,i2,nz,idx;
839   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
840 
841   PetscFunctionBegin;
842   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
843   its = its*lits;
844   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
845   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
846   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
847   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
848   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
849 
850   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
851 
852   diag  = a->diag;
853   idiag = a->idiag;
854   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
855   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
856 
857   if (flag & SOR_ZERO_INITIAL_GUESS) {
858     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
859       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
860       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
861       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
862       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
863       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
864       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
865       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
866       i2     = 7;
867       idiag += 49;
868       for (i=1; i<m; i++) {
869         v     = aa + 49*ai[i];
870         vi    = aj + ai[i];
871         nz    = diag[i] - ai[i];
872         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
873         while (nz--) {
874           idx  = 7*(*vi++);
875           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
876           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
877           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
878           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
879           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
880           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
881           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
882           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
883           v   += 49;
884         }
885         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
886         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
887         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
888         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
889         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
890         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
891         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
892         idiag   += 49;
893         i2      += 7;
894       }
895       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
896       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
897     }
898     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
899         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
900       i2    = 0;
901       mdiag = a->idiag+49*a->mbs;
902       for (i=0; i<m; i++) {
903         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
904         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
905         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
906         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
907         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
908         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
909         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
910         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
911         mdiag  += 49;
912         i2     += 7;
913       }
914       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
915     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
916       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
917     }
918     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
919       idiag   = a->idiag+49*a->mbs - 49;
920       i2      = 7*m - 7;
921       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
922       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
923       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
924       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
925       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
926       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
927       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
928       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
929       idiag -= 49;
930       i2    -= 7;
931       for (i=m-2; i>=0; i--) {
932         v     = aa + 49*(diag[i]+1);
933         vi    = aj + diag[i] + 1;
934         nz    = ai[i+1] - diag[i] - 1;
935         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
936         while (nz--) {
937           idx  = 7*(*vi++);
938           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
939           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
940           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
941           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
942           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
943           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
944           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
945           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
946           v   += 49;
947         }
948         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
949         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
950         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
951         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
952         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
953         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
954         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
955         idiag   -= 49;
956         i2      -= 7;
957       }
958       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
959     }
960   } else {
961     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
962   }
963   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
964   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 #undef __FUNCT__
969 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
970 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
971 {
972   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
973   PetscScalar        *x,*work,*w,*workt;
974   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
975   const PetscScalar  *b;
976   PetscErrorCode     ierr;
977   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
978   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
979 
980   PetscFunctionBegin;
981   its = its*lits;
982   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
983   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
984   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
985   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
986   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
987   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
988 
989   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
990 
991   diag  = a->diag;
992   idiag = a->idiag;
993   if (!a->mult_work) {
994     k    = PetscMax(A->rmap->n,A->cmap->n);
995     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
996   }
997   work = a->mult_work;
998   if (!a->sor_work) {
999     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
1000   }
1001   w = a->sor_work;
1002 
1003   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1004   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1005 
1006   if (flag & SOR_ZERO_INITIAL_GUESS) {
1007     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1008       Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
1009       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
1010       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
1011       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
1012       i2     = bs;
1013       idiag += bs2;
1014       for (i=1; i<m; i++) {
1015         v     = aa + bs2*ai[i];
1016         vi    = aj + ai[i];
1017         nz    = diag[i] - ai[i];
1018 
1019         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1020 	/* copy all rows of x that are needed into contiguous space */
1021         workt = work;
1022         for (j=0; j<nz; j++) {
1023           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1024           workt += bs;
1025         }
1026         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1027        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1028         while (nz--) {
1029           idx  = N*(*vi++);
1030           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1031           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1032           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1033           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1034           v   += N2;
1035 	  } */
1036 
1037         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1038         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1039         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1040         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1041 
1042         idiag   += bs2;
1043         i2      += bs;
1044       }
1045       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1046       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1047     }
1048     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1049         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1050       i2    = 0;
1051       mdiag = a->idiag+bs2*a->mbs;
1052       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1053       for (i=0; i<m; i++) {
1054         Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1055         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1056         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1057         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1058         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1059 
1060         mdiag  += bs2;
1061         i2     += bs;
1062       }
1063       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1064     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1065       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1066     }
1067     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1068       idiag   = a->idiag+bs2*a->mbs - bs2;
1069       i2      = bs*m - bs;
1070       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1071       Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1072       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1073       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1074       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1075       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1076       idiag -= bs2;
1077       i2    -= bs;
1078       for (i=m-2; i>=0; i--) {
1079         v     = aa + bs2*(diag[i]+1);
1080         vi    = aj + diag[i] + 1;
1081         nz    = ai[i+1] - diag[i] - 1;
1082 
1083         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1084 	/* copy all rows of x that are needed into contiguous space */
1085         workt = work;
1086         for (j=0; j<nz; j++) {
1087           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1088           workt += bs;
1089         }
1090         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1091         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1092         while (nz--) {
1093           idx  = N*(*vi++);
1094           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1095           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1096           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1097           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1098           v   += N2;
1099 	  } */
1100 
1101         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1102         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1103         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1104         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1105         idiag   -= bs2;
1106         i2      -= bs;
1107       }
1108       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1109     }
1110   } else {
1111     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1112   }
1113   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1114   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 /*
1119     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1120 */
1121 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1122 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1123 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1124 #define matsetvaluesblocked4_ matsetvaluesblocked4
1125 #endif
1126 
1127 EXTERN_C_BEGIN
1128 #undef __FUNCT__
1129 #define __FUNCT__ "matsetvaluesblocked4_"
1130 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1131 {
1132   Mat               A = *AA;
1133   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1134   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1135   PetscInt          *ai=a->i,*ailen=a->ilen;
1136   PetscInt          *aj=a->j,stepval,lastcol = -1;
1137   const PetscScalar *value = v;
1138   MatScalar         *ap,*aa = a->a,*bap;
1139 
1140   PetscFunctionBegin;
1141   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1142   stepval = (n-1)*4;
1143   for (k=0; k<m; k++) { /* loop over added rows */
1144     row  = im[k];
1145     rp   = aj + ai[row];
1146     ap   = aa + 16*ai[row];
1147     nrow = ailen[row];
1148     low  = 0;
1149     high = nrow;
1150     for (l=0; l<n; l++) { /* loop over added columns */
1151       col = in[l];
1152       if (col <= lastcol) low = 0; else high = nrow;
1153       lastcol = col;
1154       value = v + k*(stepval+4 + l)*4;
1155       while (high-low > 7) {
1156         t = (low+high)/2;
1157         if (rp[t] > col) high = t;
1158         else             low  = t;
1159       }
1160       for (i=low; i<high; i++) {
1161         if (rp[i] > col) break;
1162         if (rp[i] == col) {
1163           bap  = ap +  16*i;
1164           for (ii=0; ii<4; ii++,value+=stepval) {
1165             for (jj=ii; jj<16; jj+=4) {
1166               bap[jj] += *value++;
1167             }
1168           }
1169           goto noinsert2;
1170         }
1171       }
1172       N = nrow++ - 1;
1173       high++; /* added new column index thus must search to one higher than before */
1174       /* shift up all the later entries in this row */
1175       for (ii=N; ii>=i; ii--) {
1176         rp[ii+1] = rp[ii];
1177         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1178       }
1179       if (N >= i) {
1180         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1181       }
1182       rp[i] = col;
1183       bap   = ap +  16*i;
1184       for (ii=0; ii<4; ii++,value+=stepval) {
1185         for (jj=ii; jj<16; jj+=4) {
1186           bap[jj] = *value++;
1187         }
1188       }
1189       noinsert2:;
1190       low = i;
1191     }
1192     ailen[row] = nrow;
1193   }
1194   PetscFunctionReturnVoid();
1195 }
1196 EXTERN_C_END
1197 
1198 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1199 #define matsetvalues4_ MATSETVALUES4
1200 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1201 #define matsetvalues4_ matsetvalues4
1202 #endif
1203 
1204 EXTERN_C_BEGIN
1205 #undef __FUNCT__
1206 #define __FUNCT__ "MatSetValues4_"
1207 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1208 {
1209   Mat         A = *AA;
1210   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1211   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1212   PetscInt    *ai=a->i,*ailen=a->ilen;
1213   PetscInt    *aj=a->j,brow,bcol;
1214   PetscInt    ridx,cidx,lastcol = -1;
1215   MatScalar   *ap,value,*aa=a->a,*bap;
1216 
1217   PetscFunctionBegin;
1218   for (k=0; k<m; k++) { /* loop over added rows */
1219     row  = im[k]; brow = row/4;
1220     rp   = aj + ai[brow];
1221     ap   = aa + 16*ai[brow];
1222     nrow = ailen[brow];
1223     low  = 0;
1224     high = nrow;
1225     for (l=0; l<n; l++) { /* loop over added columns */
1226       col = in[l]; bcol = col/4;
1227       ridx = row % 4; cidx = col % 4;
1228       value = v[l + k*n];
1229       if (col <= lastcol) low = 0; else high = nrow;
1230       lastcol = col;
1231       while (high-low > 7) {
1232         t = (low+high)/2;
1233         if (rp[t] > bcol) high = t;
1234         else              low  = t;
1235       }
1236       for (i=low; i<high; i++) {
1237         if (rp[i] > bcol) break;
1238         if (rp[i] == bcol) {
1239           bap  = ap +  16*i + 4*cidx + ridx;
1240           *bap += value;
1241           goto noinsert1;
1242         }
1243       }
1244       N = nrow++ - 1;
1245       high++; /* added new column thus must search to one higher than before */
1246       /* shift up all the later entries in this row */
1247       for (ii=N; ii>=i; ii--) {
1248         rp[ii+1] = rp[ii];
1249         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1250       }
1251       if (N>=i) {
1252         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1253       }
1254       rp[i]                    = bcol;
1255       ap[16*i + 4*cidx + ridx] = value;
1256       noinsert1:;
1257       low = i;
1258     }
1259     ailen[brow] = nrow;
1260   }
1261   PetscFunctionReturnVoid();
1262 }
1263 EXTERN_C_END
1264 
1265 /*
1266      Checks for missing diagonals
1267 */
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1270 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1271 {
1272   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1273   PetscErrorCode ierr;
1274   PetscInt       *diag,*jj = a->j,i;
1275 
1276   PetscFunctionBegin;
1277   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1278   *missing = PETSC_FALSE;
1279   if (A->rmap->n > 0 && !jj) {
1280     *missing  = PETSC_TRUE;
1281     if (d) *d = 0;
1282     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1283   } else {
1284     diag     = a->diag;
1285     for (i=0; i<a->mbs; i++) {
1286       if (jj[diag[i]] != i) {
1287         *missing  = PETSC_TRUE;
1288         if (d) *d = i;
1289         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1290       }
1291     }
1292   }
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1298 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1299 {
1300   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1301   PetscErrorCode ierr;
1302   PetscInt       i,j,m = a->mbs;
1303 
1304   PetscFunctionBegin;
1305   if (!a->diag) {
1306     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1307     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1308     a->free_diag = PETSC_TRUE;
1309   }
1310   for (i=0; i<m; i++) {
1311     a->diag[i] = a->i[i+1];
1312     for (j=a->i[i]; j<a->i[i+1]; j++) {
1313       if (a->j[j] == i) {
1314         a->diag[i] = j;
1315         break;
1316       }
1317     }
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 
1323 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1327 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1328 {
1329   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1330   PetscErrorCode ierr;
1331   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1332   PetscInt       *tia, *tja;
1333 
1334   PetscFunctionBegin;
1335   *nn = n;
1336   if (!ia) PetscFunctionReturn(0);
1337   if (symmetric) {
1338     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1339     nz   = tia[n];
1340   } else {
1341     tia = a->i; tja = a->j;
1342   }
1343 
1344   if (!blockcompressed && bs > 1) {
1345     (*nn) *= bs;
1346     /* malloc & create the natural set of indices */
1347     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1348     if (n) {
1349       (*ia)[0] = 0;
1350       for (j=1; j<bs; j++) {
1351         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1352       }
1353     }
1354 
1355     for (i=1; i<n; i++) {
1356       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1357       for (j=1; j<bs; j++) {
1358         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1359       }
1360     }
1361     if (n) {
1362       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1363     }
1364 
1365     if (ja) {
1366       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1367       cnt = 0;
1368       for (i=0; i<n; i++) {
1369         for (j=0; j<bs; j++) {
1370           for (k=tia[i]; k<tia[i+1]; k++) {
1371             for (l=0; l<bs; l++) {
1372               (*ja)[cnt++] = bs*tja[k] + l;
1373 	    }
1374           }
1375         }
1376       }
1377     }
1378 
1379     n     *= bs;
1380     nz *= bs*bs;
1381     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1382       ierr = PetscFree(tia);CHKERRQ(ierr);
1383       ierr = PetscFree(tja);CHKERRQ(ierr);
1384     }
1385   } else if (oshift == 1) {
1386     if (symmetric) {
1387       PetscInt nz = tia[A->rmap->n/bs];
1388       /*  add 1 to i and j indices */
1389       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1390       *ia = tia;
1391       if (ja) {
1392 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1393         *ja = tja;
1394       }
1395     } else {
1396       PetscInt nz = a->i[A->rmap->n/bs];
1397       /* malloc space and  add 1 to i and j indices */
1398       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1399       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1400       if (ja) {
1401 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1402 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1403       }
1404     }
1405   } else {
1406     *ia = tia;
1407     if (ja) *ja = tja;
1408   }
1409 
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1415 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1416 {
1417   PetscErrorCode ierr;
1418 
1419   PetscFunctionBegin;
1420   if (!ia) PetscFunctionReturn(0);
1421   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1422     ierr = PetscFree(*ia);CHKERRQ(ierr);
1423     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1424   }
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1430 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1431 {
1432   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1433   PetscErrorCode ierr;
1434 
1435   PetscFunctionBegin;
1436 #if defined(PETSC_USE_LOG)
1437   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1438 #endif
1439   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1440   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1441   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1442   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1443   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1444   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1445   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1446   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1447   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1448   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1449   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1450   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1451   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1452 
1453   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1454   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1455   ierr = PetscFree(A->data);CHKERRQ(ierr);
1456 
1457   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1458   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1459   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1460   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1461   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1462   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1463   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1464   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1465   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1466   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1467   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1473 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1474 {
1475   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1476   PetscErrorCode ierr;
1477 
1478   PetscFunctionBegin;
1479   switch (op) {
1480   case MAT_ROW_ORIENTED:
1481     a->roworiented    = flg;
1482     break;
1483   case MAT_KEEP_NONZERO_PATTERN:
1484     a->keepnonzeropattern = flg;
1485     break;
1486   case MAT_NEW_NONZERO_LOCATIONS:
1487     a->nonew          = (flg ? 0 : 1);
1488     break;
1489   case MAT_NEW_NONZERO_LOCATION_ERR:
1490     a->nonew          = (flg ? -1 : 0);
1491     break;
1492   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1493     a->nonew          = (flg ? -2 : 0);
1494     break;
1495   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1496     a->nounused       = (flg ? -1 : 0);
1497     break;
1498   case MAT_CHECK_COMPRESSED_ROW:
1499     a->compressedrow.check = flg;
1500     break;
1501   case MAT_NEW_DIAGONALS:
1502   case MAT_IGNORE_OFF_PROC_ENTRIES:
1503   case MAT_USE_HASH_TABLE:
1504     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1505     break;
1506   case MAT_SYMMETRIC:
1507   case MAT_STRUCTURALLY_SYMMETRIC:
1508   case MAT_HERMITIAN:
1509   case MAT_SYMMETRY_ETERNAL:
1510     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1511     break;
1512   default:
1513     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1514   }
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1520 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1521 {
1522   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1523   PetscErrorCode ierr;
1524   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1525   MatScalar      *aa,*aa_i;
1526   PetscScalar    *v_i;
1527 
1528   PetscFunctionBegin;
1529   bs  = A->rmap->bs;
1530   ai  = a->i;
1531   aj  = a->j;
1532   aa  = a->a;
1533   bs2 = a->bs2;
1534 
1535   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1536 
1537   bn  = row/bs;   /* Block number */
1538   bp  = row % bs; /* Block Position */
1539   M   = ai[bn+1] - ai[bn];
1540   *nz = bs*M;
1541 
1542   if (v) {
1543     *v = 0;
1544     if (*nz) {
1545       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1546       for (i=0; i<M; i++) { /* for each block in the block row */
1547         v_i  = *v + i*bs;
1548         aa_i = aa + bs2*(ai[bn] + i);
1549         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1550       }
1551     }
1552   }
1553 
1554   if (idx) {
1555     *idx = 0;
1556     if (*nz) {
1557       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1558       for (i=0; i<M; i++) { /* for each block in the block row */
1559         idx_i = *idx + i*bs;
1560         itmp  = bs*aj[ai[bn] + i];
1561         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1562       }
1563     }
1564   }
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1570 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1571 {
1572   PetscErrorCode ierr;
1573 
1574   PetscFunctionBegin;
1575   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1576   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1581 
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1584 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1585 {
1586   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1587   Mat            C;
1588   PetscErrorCode ierr;
1589   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1590   PetscInt       *rows,*cols,bs2=a->bs2;
1591   MatScalar      *array;
1592 
1593   PetscFunctionBegin;
1594   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1595   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1596     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1597     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1598 
1599     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1600     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1601     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1602     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1603     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1604     ierr = PetscFree(col);CHKERRQ(ierr);
1605   } else {
1606     C = *B;
1607   }
1608 
1609   array = a->a;
1610   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1611   for (i=0; i<mbs; i++) {
1612     cols[0] = i*bs;
1613     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1614     len = ai[i+1] - ai[i];
1615     for (j=0; j<len; j++) {
1616       rows[0] = (*aj++)*bs;
1617       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1618       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1619       array += bs2;
1620     }
1621   }
1622   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1623 
1624   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1625   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1626 
1627   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1628     *B = C;
1629   } else {
1630     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1631   }
1632   PetscFunctionReturn(0);
1633 }
1634 
1635 EXTERN_C_BEGIN
1636 #undef __FUNCT__
1637 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1638 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1639 {
1640   PetscErrorCode ierr;
1641   Mat            Btrans;
1642 
1643   PetscFunctionBegin;
1644   *f = PETSC_FALSE;
1645   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1646   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1647   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1648   PetscFunctionReturn(0);
1649 }
1650 EXTERN_C_END
1651 
1652 #undef __FUNCT__
1653 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1654 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1655 {
1656   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1657   PetscErrorCode ierr;
1658   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1659   int            fd;
1660   PetscScalar    *aa;
1661   FILE           *file;
1662 
1663   PetscFunctionBegin;
1664   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1665   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1666   col_lens[0] = MAT_FILE_CLASSID;
1667 
1668   col_lens[1] = A->rmap->N;
1669   col_lens[2] = A->cmap->n;
1670   col_lens[3] = a->nz*bs2;
1671 
1672   /* store lengths of each row and write (including header) to file */
1673   count = 0;
1674   for (i=0; i<a->mbs; i++) {
1675     for (j=0; j<bs; j++) {
1676       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1677     }
1678   }
1679   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1680   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1681 
1682   /* store column indices (zero start index) */
1683   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1684   count = 0;
1685   for (i=0; i<a->mbs; i++) {
1686     for (j=0; j<bs; j++) {
1687       for (k=a->i[i]; k<a->i[i+1]; k++) {
1688         for (l=0; l<bs; l++) {
1689           jj[count++] = bs*a->j[k] + l;
1690         }
1691       }
1692     }
1693   }
1694   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1695   ierr = PetscFree(jj);CHKERRQ(ierr);
1696 
1697   /* store nonzero values */
1698   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1699   count = 0;
1700   for (i=0; i<a->mbs; i++) {
1701     for (j=0; j<bs; j++) {
1702       for (k=a->i[i]; k<a->i[i+1]; k++) {
1703         for (l=0; l<bs; l++) {
1704           aa[count++] = a->a[bs2*k + l*bs + j];
1705         }
1706       }
1707     }
1708   }
1709   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1710   ierr = PetscFree(aa);CHKERRQ(ierr);
1711 
1712   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1713   if (file) {
1714     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1715   }
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 #undef __FUNCT__
1720 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1721 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1722 {
1723   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1724   PetscErrorCode    ierr;
1725   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1726   PetscViewerFormat format;
1727 
1728   PetscFunctionBegin;
1729   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1730   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1731     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1732   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1733     Mat aij;
1734     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1735     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1736     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1737   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1738      PetscFunctionReturn(0);
1739   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1740     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1741     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1742     for (i=0; i<a->mbs; i++) {
1743       for (j=0; j<bs; j++) {
1744         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1745         for (k=a->i[i]; k<a->i[i+1]; k++) {
1746           for (l=0; l<bs; l++) {
1747 #if defined(PETSC_USE_COMPLEX)
1748             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1749               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1750                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1751             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1752               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1753                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1754             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1755               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1756             }
1757 #else
1758             if (a->a[bs2*k + l*bs + j] != 0.0) {
1759               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1760             }
1761 #endif
1762           }
1763         }
1764         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1765       }
1766     }
1767     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1768   } else {
1769     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1770     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1771     for (i=0; i<a->mbs; i++) {
1772       for (j=0; j<bs; j++) {
1773         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1774         for (k=a->i[i]; k<a->i[i+1]; k++) {
1775           for (l=0; l<bs; l++) {
1776 #if defined(PETSC_USE_COMPLEX)
1777             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1778               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1779                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1780             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1781               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1782                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1783             } else {
1784               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1785             }
1786 #else
1787             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1788 #endif
1789           }
1790         }
1791         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1792       }
1793     }
1794     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1795   }
1796   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1797   PetscFunctionReturn(0);
1798 }
1799 
1800 #undef __FUNCT__
1801 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1802 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1803 {
1804   Mat            A = (Mat) Aa;
1805   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1806   PetscErrorCode ierr;
1807   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1808   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1809   MatScalar      *aa;
1810   PetscViewer    viewer;
1811 
1812   PetscFunctionBegin;
1813 
1814   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1815   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1816 
1817   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1818 
1819   /* loop over matrix elements drawing boxes */
1820   color = PETSC_DRAW_BLUE;
1821   for (i=0,row=0; i<mbs; i++,row+=bs) {
1822     for (j=a->i[i]; j<a->i[i+1]; j++) {
1823       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1824       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1825       aa = a->a + j*bs2;
1826       for (k=0; k<bs; k++) {
1827         for (l=0; l<bs; l++) {
1828           if (PetscRealPart(*aa++) >=  0.) continue;
1829           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1830         }
1831       }
1832     }
1833   }
1834   color = PETSC_DRAW_CYAN;
1835   for (i=0,row=0; i<mbs; i++,row+=bs) {
1836     for (j=a->i[i]; j<a->i[i+1]; j++) {
1837       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1838       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1839       aa = a->a + j*bs2;
1840       for (k=0; k<bs; k++) {
1841         for (l=0; l<bs; l++) {
1842           if (PetscRealPart(*aa++) != 0.) continue;
1843           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1844         }
1845       }
1846     }
1847   }
1848 
1849   color = PETSC_DRAW_RED;
1850   for (i=0,row=0; i<mbs; i++,row+=bs) {
1851     for (j=a->i[i]; j<a->i[i+1]; j++) {
1852       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1853       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1854       aa = a->a + j*bs2;
1855       for (k=0; k<bs; k++) {
1856         for (l=0; l<bs; l++) {
1857           if (PetscRealPart(*aa++) <= 0.) continue;
1858           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1859         }
1860       }
1861     }
1862   }
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 #undef __FUNCT__
1867 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1868 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1869 {
1870   PetscErrorCode ierr;
1871   PetscReal      xl,yl,xr,yr,w,h;
1872   PetscDraw      draw;
1873   PetscBool      isnull;
1874 
1875   PetscFunctionBegin;
1876 
1877   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1878   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1879 
1880   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1881   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1882   xr += w;    yr += h;  xl = -w;     yl = -h;
1883   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1884   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1885   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 #undef __FUNCT__
1890 #define __FUNCT__ "MatView_SeqBAIJ"
1891 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1892 {
1893   PetscErrorCode ierr;
1894   PetscBool      iascii,isbinary,isdraw;
1895 
1896   PetscFunctionBegin;
1897   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1898   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1899   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1900   if (iascii){
1901     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1902   } else if (isbinary) {
1903     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1904   } else if (isdraw) {
1905     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1906   } else {
1907     Mat B;
1908     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1909     ierr = MatView(B,viewer);CHKERRQ(ierr);
1910     ierr = MatDestroy(&B);CHKERRQ(ierr);
1911   }
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1918 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1919 {
1920   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1921   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1922   PetscInt    *ai = a->i,*ailen = a->ilen;
1923   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1924   MatScalar   *ap,*aa = a->a;
1925 
1926   PetscFunctionBegin;
1927   for (k=0; k<m; k++) { /* loop over rows */
1928     row  = im[k]; brow = row/bs;
1929     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1930     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1931     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1932     nrow = ailen[brow];
1933     for (l=0; l<n; l++) { /* loop over columns */
1934       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1935       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1936       col  = in[l] ;
1937       bcol = col/bs;
1938       cidx = col%bs;
1939       ridx = row%bs;
1940       high = nrow;
1941       low  = 0; /* assume unsorted */
1942       while (high-low > 5) {
1943         t = (low+high)/2;
1944         if (rp[t] > bcol) high = t;
1945         else             low  = t;
1946       }
1947       for (i=low; i<high; i++) {
1948         if (rp[i] > bcol) break;
1949         if (rp[i] == bcol) {
1950           *v++ = ap[bs2*i+bs*cidx+ridx];
1951           goto finished;
1952         }
1953       }
1954       *v++ = 0.0;
1955       finished:;
1956     }
1957   }
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1963 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1964 {
1965   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1966   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1967   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1968   PetscErrorCode    ierr;
1969   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1970   PetscBool         roworiented=a->roworiented;
1971   const PetscScalar *value = v;
1972   MatScalar         *ap,*aa = a->a,*bap;
1973 
1974   PetscFunctionBegin;
1975   if (roworiented) {
1976     stepval = (n-1)*bs;
1977   } else {
1978     stepval = (m-1)*bs;
1979   }
1980   for (k=0; k<m; k++) { /* loop over added rows */
1981     row  = im[k];
1982     if (row < 0) continue;
1983 #if defined(PETSC_USE_DEBUG)
1984     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1985 #endif
1986     rp   = aj + ai[row];
1987     ap   = aa + bs2*ai[row];
1988     rmax = imax[row];
1989     nrow = ailen[row];
1990     low  = 0;
1991     high = nrow;
1992     for (l=0; l<n; l++) { /* loop over added columns */
1993       if (in[l] < 0) continue;
1994 #if defined(PETSC_USE_DEBUG)
1995       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1996 #endif
1997       col = in[l];
1998       if (roworiented) {
1999         value = v + (k*(stepval+bs) + l)*bs;
2000       } else {
2001         value = v + (l*(stepval+bs) + k)*bs;
2002       }
2003       if (col <= lastcol) low = 0; else high = nrow;
2004       lastcol = col;
2005       while (high-low > 7) {
2006         t = (low+high)/2;
2007         if (rp[t] > col) high = t;
2008         else             low  = t;
2009       }
2010       for (i=low; i<high; i++) {
2011         if (rp[i] > col) break;
2012         if (rp[i] == col) {
2013           bap  = ap +  bs2*i;
2014           if (roworiented) {
2015             if (is == ADD_VALUES) {
2016               for (ii=0; ii<bs; ii++,value+=stepval) {
2017                 for (jj=ii; jj<bs2; jj+=bs) {
2018                   bap[jj] += *value++;
2019                 }
2020               }
2021             } else {
2022               for (ii=0; ii<bs; ii++,value+=stepval) {
2023                 for (jj=ii; jj<bs2; jj+=bs) {
2024                   bap[jj] = *value++;
2025                 }
2026               }
2027             }
2028           } else {
2029             if (is == ADD_VALUES) {
2030               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2031                 for (jj=0; jj<bs; jj++) {
2032                   bap[jj] += value[jj];
2033                 }
2034                 bap += bs;
2035               }
2036             } else {
2037               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2038                 for (jj=0; jj<bs; jj++) {
2039                   bap[jj]  = value[jj];
2040                 }
2041                 bap += bs;
2042               }
2043             }
2044           }
2045           goto noinsert2;
2046         }
2047       }
2048       if (nonew == 1) goto noinsert2;
2049       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2050       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2051       N = nrow++ - 1; high++;
2052       /* shift up all the later entries in this row */
2053       for (ii=N; ii>=i; ii--) {
2054         rp[ii+1] = rp[ii];
2055         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2056       }
2057       if (N >= i) {
2058         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2059       }
2060       rp[i] = col;
2061       bap   = ap +  bs2*i;
2062       if (roworiented) {
2063         for (ii=0; ii<bs; ii++,value+=stepval) {
2064           for (jj=ii; jj<bs2; jj+=bs) {
2065             bap[jj] = *value++;
2066           }
2067         }
2068       } else {
2069         for (ii=0; ii<bs; ii++,value+=stepval) {
2070           for (jj=0; jj<bs; jj++) {
2071             *bap++  = *value++;
2072           }
2073         }
2074       }
2075       noinsert2:;
2076       low = i;
2077     }
2078     ailen[row] = nrow;
2079   }
2080   PetscFunctionReturn(0);
2081 }
2082 
2083 #undef __FUNCT__
2084 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2085 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2086 {
2087   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2088   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2089   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2090   PetscErrorCode ierr;
2091   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2092   MatScalar      *aa = a->a,*ap;
2093   PetscReal      ratio=0.6;
2094 
2095   PetscFunctionBegin;
2096   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2097 
2098   if (m) rmax = ailen[0];
2099   for (i=1; i<mbs; i++) {
2100     /* move each row back by the amount of empty slots (fshift) before it*/
2101     fshift += imax[i-1] - ailen[i-1];
2102     rmax   = PetscMax(rmax,ailen[i]);
2103     if (fshift) {
2104       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2105       N = ailen[i];
2106       for (j=0; j<N; j++) {
2107         ip[j-fshift] = ip[j];
2108         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2109       }
2110     }
2111     ai[i] = ai[i-1] + ailen[i-1];
2112   }
2113   if (mbs) {
2114     fshift += imax[mbs-1] - ailen[mbs-1];
2115     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2116   }
2117   /* reset ilen and imax for each row */
2118   for (i=0; i<mbs; i++) {
2119     ailen[i] = imax[i] = ai[i+1] - ai[i];
2120   }
2121   a->nz = ai[mbs];
2122 
2123   /* diagonals may have moved, so kill the diagonal pointers */
2124   a->idiagvalid = PETSC_FALSE;
2125   if (fshift && a->diag) {
2126     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2127     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2128     a->diag = 0;
2129   }
2130   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2131   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2132   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2133   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2134   A->info.mallocs     += a->reallocs;
2135   a->reallocs          = 0;
2136   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2137 
2138   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2139   A->same_nonzero = PETSC_TRUE;
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 /*
2144    This function returns an array of flags which indicate the locations of contiguous
2145    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2146    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2147    Assume: sizes should be long enough to hold all the values.
2148 */
2149 #undef __FUNCT__
2150 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2151 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2152 {
2153   PetscInt   i,j,k,row;
2154   PetscBool  flg;
2155 
2156   PetscFunctionBegin;
2157   for (i=0,j=0; i<n; j++) {
2158     row = idx[i];
2159     if (row%bs!=0) { /* Not the begining of a block */
2160       sizes[j] = 1;
2161       i++;
2162     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2163       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2164       i++;
2165     } else { /* Begining of the block, so check if the complete block exists */
2166       flg = PETSC_TRUE;
2167       for (k=1; k<bs; k++) {
2168         if (row+k != idx[i+k]) { /* break in the block */
2169           flg = PETSC_FALSE;
2170           break;
2171         }
2172       }
2173       if (flg) { /* No break in the bs */
2174         sizes[j] = bs;
2175         i+= bs;
2176       } else {
2177         sizes[j] = 1;
2178         i++;
2179       }
2180     }
2181   }
2182   *bs_max = j;
2183   PetscFunctionReturn(0);
2184 }
2185 
2186 #undef __FUNCT__
2187 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2188 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2189 {
2190   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2191   PetscErrorCode    ierr;
2192   PetscInt          i,j,k,count,*rows;
2193   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2194   PetscScalar       zero = 0.0;
2195   MatScalar         *aa;
2196   const PetscScalar *xx;
2197   PetscScalar       *bb;
2198 
2199   PetscFunctionBegin;
2200   /* fix right hand side if needed */
2201   if (x && b) {
2202     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2203     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2204     for (i=0; i<is_n; i++) {
2205       bb[is_idx[i]] = diag*xx[is_idx[i]];
2206     }
2207     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2208     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2209   }
2210 
2211   /* Make a copy of the IS and  sort it */
2212   /* allocate memory for rows,sizes */
2213   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2214 
2215   /* copy IS values to rows, and sort them */
2216   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2217   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2218 
2219   if (baij->keepnonzeropattern) {
2220     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2221     bs_max = is_n;
2222     A->same_nonzero = PETSC_TRUE;
2223   } else {
2224     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2225     A->same_nonzero = PETSC_FALSE;
2226   }
2227 
2228   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2229     row   = rows[j];
2230     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2231     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2232     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2233     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2234       if (diag != 0.0) {
2235         if (baij->ilen[row/bs] > 0) {
2236           baij->ilen[row/bs]       = 1;
2237           baij->j[baij->i[row/bs]] = row/bs;
2238           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2239         }
2240         /* Now insert all the diagonal values for this bs */
2241         for (k=0; k<bs; k++) {
2242           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2243         }
2244       } else { /* (diag == 0.0) */
2245         baij->ilen[row/bs] = 0;
2246       } /* end (diag == 0.0) */
2247     } else { /* (sizes[i] != bs) */
2248 #if defined (PETSC_USE_DEBUG)
2249       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2250 #endif
2251       for (k=0; k<count; k++) {
2252         aa[0] =  zero;
2253         aa    += bs;
2254       }
2255       if (diag != 0.0) {
2256         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2257       }
2258     }
2259   }
2260 
2261   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2262   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 #undef __FUNCT__
2267 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2268 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2269 {
2270   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2271   PetscErrorCode    ierr;
2272   PetscInt          i,j,k,count;
2273   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2274   PetscScalar       zero = 0.0;
2275   MatScalar         *aa;
2276   const PetscScalar *xx;
2277   PetscScalar       *bb;
2278   PetscBool         *zeroed,vecs = PETSC_FALSE;
2279 
2280   PetscFunctionBegin;
2281   /* fix right hand side if needed */
2282   if (x && b) {
2283     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2284     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2285     vecs = PETSC_TRUE;
2286   }
2287   A->same_nonzero = PETSC_TRUE;
2288 
2289   /* zero the columns */
2290   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2291   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2292   for (i=0; i<is_n; i++) {
2293     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2294     zeroed[is_idx[i]] = PETSC_TRUE;
2295   }
2296   for (i=0; i<A->rmap->N; i++) {
2297     if (!zeroed[i]) {
2298       row = i/bs;
2299       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2300         for (k=0; k<bs; k++) {
2301           col = bs*baij->j[j] + k;
2302 	  if (zeroed[col]) {
2303 	    aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2304             if (vecs) bb[i] -= aa[0]*xx[col];
2305             aa[0] = 0.0;
2306           }
2307         }
2308       }
2309     } else if (vecs) bb[i] = diag*xx[i];
2310   }
2311   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2312   if (vecs) {
2313     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2314     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2315   }
2316 
2317   /* zero the rows */
2318   for (i=0; i<is_n; i++) {
2319     row   = is_idx[i];
2320     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2321     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2322     for (k=0; k<count; k++) {
2323       aa[0] =  zero;
2324       aa    += bs;
2325     }
2326     if (diag != 0.0) {
2327       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2328     }
2329   }
2330   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2331   PetscFunctionReturn(0);
2332 }
2333 
2334 #undef __FUNCT__
2335 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2336 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2337 {
2338   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2339   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2340   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2341   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2342   PetscErrorCode ierr;
2343   PetscInt       ridx,cidx,bs2=a->bs2;
2344   PetscBool      roworiented=a->roworiented;
2345   MatScalar      *ap,value,*aa=a->a,*bap;
2346 
2347   PetscFunctionBegin;
2348   if (v) PetscValidScalarPointer(v,6);
2349   for (k=0; k<m; k++) { /* loop over added rows */
2350     row  = im[k];
2351     brow = row/bs;
2352     if (row < 0) continue;
2353 #if defined(PETSC_USE_DEBUG)
2354     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2355 #endif
2356     rp   = aj + ai[brow];
2357     ap   = aa + bs2*ai[brow];
2358     rmax = imax[brow];
2359     nrow = ailen[brow];
2360     low  = 0;
2361     high = nrow;
2362     for (l=0; l<n; l++) { /* loop over added columns */
2363       if (in[l] < 0) continue;
2364 #if defined(PETSC_USE_DEBUG)
2365       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2366 #endif
2367       col = in[l]; bcol = col/bs;
2368       ridx = row % bs; cidx = col % bs;
2369       if (roworiented) {
2370         value = v[l + k*n];
2371       } else {
2372         value = v[k + l*m];
2373       }
2374       if (col <= lastcol) low = 0; else high = nrow;
2375       lastcol = col;
2376       while (high-low > 7) {
2377         t = (low+high)/2;
2378         if (rp[t] > bcol) high = t;
2379         else              low  = t;
2380       }
2381       for (i=low; i<high; i++) {
2382         if (rp[i] > bcol) break;
2383         if (rp[i] == bcol) {
2384           bap  = ap +  bs2*i + bs*cidx + ridx;
2385           if (is == ADD_VALUES) *bap += value;
2386           else                  *bap  = value;
2387           goto noinsert1;
2388         }
2389       }
2390       if (nonew == 1) goto noinsert1;
2391       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2392       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2393       N = nrow++ - 1; high++;
2394       /* shift up all the later entries in this row */
2395       for (ii=N; ii>=i; ii--) {
2396         rp[ii+1] = rp[ii];
2397         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2398       }
2399       if (N>=i) {
2400         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2401       }
2402       rp[i]                      = bcol;
2403       ap[bs2*i + bs*cidx + ridx] = value;
2404       a->nz++;
2405       noinsert1:;
2406       low = i;
2407     }
2408     ailen[brow] = nrow;
2409   }
2410   A->same_nonzero = PETSC_FALSE;
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 #undef __FUNCT__
2415 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2416 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2417 {
2418   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2419   Mat            outA;
2420   PetscErrorCode ierr;
2421   PetscBool      row_identity,col_identity;
2422 
2423   PetscFunctionBegin;
2424   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2425   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2426   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2427   if (!row_identity || !col_identity) {
2428     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2429   }
2430 
2431   outA            = inA;
2432   inA->factortype = MAT_FACTOR_LU;
2433 
2434   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2435 
2436   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2437   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2438   a->row = row;
2439   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2440   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2441   a->col = col;
2442 
2443   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2444   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2445    ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2446   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2447 
2448   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2449   if (!a->solve_work) {
2450     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2451     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2452   }
2453   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2454 
2455   PetscFunctionReturn(0);
2456 }
2457 
2458 EXTERN_C_BEGIN
2459 #undef __FUNCT__
2460 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2461 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2462 {
2463   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2464   PetscInt    i,nz,mbs;
2465 
2466   PetscFunctionBegin;
2467   nz  = baij->maxnz;
2468   mbs = baij->mbs;
2469   for (i=0; i<nz; i++) {
2470     baij->j[i] = indices[i];
2471   }
2472   baij->nz = nz;
2473   for (i=0; i<mbs; i++) {
2474     baij->ilen[i] = baij->imax[i];
2475   }
2476   PetscFunctionReturn(0);
2477 }
2478 EXTERN_C_END
2479 
2480 #undef __FUNCT__
2481 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2482 /*@
2483     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2484        in the matrix.
2485 
2486   Input Parameters:
2487 +  mat - the SeqBAIJ matrix
2488 -  indices - the column indices
2489 
2490   Level: advanced
2491 
2492   Notes:
2493     This can be called if you have precomputed the nonzero structure of the
2494   matrix and want to provide it to the matrix object to improve the performance
2495   of the MatSetValues() operation.
2496 
2497     You MUST have set the correct numbers of nonzeros per row in the call to
2498   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2499 
2500     MUST be called before any calls to MatSetValues();
2501 
2502 @*/
2503 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2504 {
2505   PetscErrorCode ierr;
2506 
2507   PetscFunctionBegin;
2508   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2509   PetscValidPointer(indices,2);
2510   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 #undef __FUNCT__
2515 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2516 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2517 {
2518   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2519   PetscErrorCode ierr;
2520   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2521   PetscReal      atmp;
2522   PetscScalar    *x,zero = 0.0;
2523   MatScalar      *aa;
2524   PetscInt       ncols,brow,krow,kcol;
2525 
2526   PetscFunctionBegin;
2527   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2528   bs   = A->rmap->bs;
2529   aa   = a->a;
2530   ai   = a->i;
2531   aj   = a->j;
2532   mbs  = a->mbs;
2533 
2534   ierr = VecSet(v,zero);CHKERRQ(ierr);
2535   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2536   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2537   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2538   for (i=0; i<mbs; i++) {
2539     ncols = ai[1] - ai[0]; ai++;
2540     brow  = bs*i;
2541     for (j=0; j<ncols; j++){
2542       for (kcol=0; kcol<bs; kcol++){
2543         for (krow=0; krow<bs; krow++){
2544           atmp = PetscAbsScalar(*aa);aa++;
2545           row = brow + krow;    /* row index */
2546           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2547           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2548         }
2549       }
2550       aj++;
2551     }
2552   }
2553   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatCopy_SeqBAIJ"
2559 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2560 {
2561   PetscErrorCode ierr;
2562 
2563   PetscFunctionBegin;
2564   /* If the two matrices have the same copy implementation, use fast copy. */
2565   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2566     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2567     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2568     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2569 
2570     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2571     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2572     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2573   } else {
2574     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2575   }
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 #undef __FUNCT__
2580 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2581 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2582 {
2583   PetscErrorCode ierr;
2584 
2585   PetscFunctionBegin;
2586   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 #undef __FUNCT__
2591 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2592 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2593 {
2594   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2595   PetscFunctionBegin;
2596   *array = a->a;
2597   PetscFunctionReturn(0);
2598 }
2599 
2600 #undef __FUNCT__
2601 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2602 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2603 {
2604   PetscFunctionBegin;
2605   PetscFunctionReturn(0);
2606 }
2607 
2608 #undef __FUNCT__
2609 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2610 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2611 {
2612   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2613   PetscErrorCode ierr;
2614   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2615   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2616 
2617   PetscFunctionBegin;
2618   if (str == SAME_NONZERO_PATTERN) {
2619     PetscScalar alpha = a;
2620     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2621   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2622     if (y->xtoy && y->XtoY != X) {
2623       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2624       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2625     }
2626     if (!y->xtoy) { /* get xtoy */
2627       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2628       y->XtoY = X;
2629       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2630     }
2631     bs2 = bs*bs;
2632     for (i=0; i<x->nz; i++) {
2633       j = 0;
2634       while (j < bs2){
2635         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2636         j++;
2637       }
2638     }
2639     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2640   } else {
2641     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2642   }
2643   PetscFunctionReturn(0);
2644 }
2645 
2646 #undef __FUNCT__
2647 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2648 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2649 {
2650   PetscInt rbs,cbs;
2651   PetscErrorCode ierr;
2652 
2653   PetscFunctionBegin;
2654   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2655   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2656   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2657   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2658   PetscFunctionReturn(0);
2659 }
2660 
2661 #undef __FUNCT__
2662 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2663 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2664 {
2665   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2666   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2667   MatScalar      *aa = a->a;
2668 
2669   PetscFunctionBegin;
2670   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2671   PetscFunctionReturn(0);
2672 }
2673 
2674 #undef __FUNCT__
2675 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2676 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2677 {
2678   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2679   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2680   MatScalar      *aa = a->a;
2681 
2682   PetscFunctionBegin;
2683   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2684   PetscFunctionReturn(0);
2685 }
2686 
2687 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2688 
2689 #undef __FUNCT__
2690 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2691 /*
2692     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2693 */
2694 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2695 {
2696   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2697   PetscErrorCode ierr;
2698   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2699   PetscInt       nz = a->i[m],row,*jj,mr,col;
2700 
2701   PetscFunctionBegin;
2702   *nn = n;
2703   if (!ia) PetscFunctionReturn(0);
2704   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2705   else {
2706     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2707     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2708     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2709     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2710     jj = a->j;
2711     for (i=0; i<nz; i++) {
2712       collengths[jj[i]]++;
2713     }
2714     cia[0] = oshift;
2715     for (i=0; i<n; i++) {
2716       cia[i+1] = cia[i] + collengths[i];
2717     }
2718     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2719     jj   = a->j;
2720     for (row=0; row<m; row++) {
2721       mr = a->i[row+1] - a->i[row];
2722       for (i=0; i<mr; i++) {
2723         col = *jj++;
2724         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2725       }
2726     }
2727     ierr = PetscFree(collengths);CHKERRQ(ierr);
2728     *ia = cia; *ja = cja;
2729   }
2730   PetscFunctionReturn(0);
2731 }
2732 
2733 #undef __FUNCT__
2734 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2735 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2736 {
2737   PetscErrorCode ierr;
2738 
2739   PetscFunctionBegin;
2740   if (!ia) PetscFunctionReturn(0);
2741   ierr = PetscFree(*ia);CHKERRQ(ierr);
2742   ierr = PetscFree(*ja);CHKERRQ(ierr);
2743   PetscFunctionReturn(0);
2744 }
2745 
2746 #undef __FUNCT__
2747 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2748 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2749 {
2750   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2751   PetscErrorCode ierr;
2752   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2753   PetscScalar    dx,*y,*xx,*w3_array;
2754   PetscScalar    *vscale_array;
2755   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2756   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2757   void           *fctx = coloring->fctx;
2758   PetscBool      flg = PETSC_FALSE;
2759   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2760   Vec            x1_tmp;
2761 
2762   PetscFunctionBegin;
2763   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2764   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2765   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2766   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2767 
2768   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2769   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2770   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2771   if (flg) {
2772     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2773   } else {
2774     PetscBool  assembled;
2775     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2776     if (assembled) {
2777       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2778     }
2779   }
2780 
2781   x1_tmp = x1;
2782   if (!coloring->vscale){
2783     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2784   }
2785 
2786   /*
2787     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2788     coloring->F for the coarser grids from the finest
2789   */
2790   if (coloring->F) {
2791     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2792     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2793     if (m1 != m2) {
2794       coloring->F = 0;
2795       }
2796     }
2797 
2798   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2799     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2800   }
2801   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2802 
2803   /* Set w1 = F(x1) */
2804   if (coloring->F) {
2805     w1          = coloring->F; /* use already computed value of function */
2806     coloring->F = 0;
2807   } else {
2808     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2809     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2810     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2811   }
2812 
2813   if (!coloring->w3) {
2814     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2815     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2816   }
2817   w3 = coloring->w3;
2818 
2819     CHKMEMQ;
2820     /* Compute all the local scale factors, including ghost points */
2821   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2822   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2823   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2824   if (ctype == IS_COLORING_GHOSTED){
2825     col_start = 0; col_end = N;
2826   } else if (ctype == IS_COLORING_GLOBAL){
2827     xx = xx - start;
2828     vscale_array = vscale_array - start;
2829     col_start = start; col_end = N + start;
2830   }    CHKMEMQ;
2831   for (col=col_start; col<col_end; col++){
2832     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2833     if (coloring->htype[0] == 'w') {
2834       dx = 1.0 + unorm;
2835     } else {
2836       dx  = xx[col];
2837     }
2838     if (dx == 0.0) dx = 1.0;
2839 #if !defined(PETSC_USE_COMPLEX)
2840     if (dx < umin && dx >= 0.0)      dx = umin;
2841     else if (dx < 0.0 && dx > -umin) dx = -umin;
2842 #else
2843     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2844     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2845 #endif
2846     dx               *= epsilon;
2847     vscale_array[col] = 1.0/dx;
2848   }     CHKMEMQ;
2849   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2850   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2851   if (ctype == IS_COLORING_GLOBAL){
2852     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2853     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2854   }
2855   CHKMEMQ;
2856   if (coloring->vscaleforrow) {
2857     vscaleforrow = coloring->vscaleforrow;
2858   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2859 
2860   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2861   /*
2862     Loop over each color
2863   */
2864   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2865   for (k=0; k<coloring->ncolors; k++) {
2866     coloring->currentcolor = k;
2867     for (i=0; i<bs; i++) {
2868       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2869       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2870       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2871       /*
2872 	Loop over each column associated with color
2873 	adding the perturbation to the vector w3.
2874       */
2875       for (l=0; l<coloring->ncolumns[k]; l++) {
2876 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2877 	if (coloring->htype[0] == 'w') {
2878 	  dx = 1.0 + unorm;
2879 	} else {
2880 	  dx  = xx[col];
2881 	}
2882 	if (dx == 0.0) dx = 1.0;
2883 #if !defined(PETSC_USE_COMPLEX)
2884 	if (dx < umin && dx >= 0.0)      dx = umin;
2885 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2886 #else
2887 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2888 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2889 #endif
2890 	dx            *= epsilon;
2891 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2892 	w3_array[col] += dx;
2893       }
2894       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2895       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2896 
2897       /*
2898 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2899 	w2 = F(x1 + dx) - F(x1)
2900       */
2901       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2902       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2903       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2904       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2905 
2906       /*
2907 	Loop over rows of vector, putting results into Jacobian matrix
2908       */
2909       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2910       for (l=0; l<coloring->nrows[k]; l++) {
2911 	row    = bs*coloring->rows[k][l];             /* local row index */
2912 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2913         for (j=0; j<bs; j++) {
2914   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2915           srows[j]  = row + start + j;
2916         }
2917 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2918       }
2919       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2920     }
2921   } /* endof for each color */
2922   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2923   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2924   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2925   ierr = PetscFree(srows);CHKERRQ(ierr);
2926 
2927   coloring->currentcolor = -1;
2928   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2929   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2930   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2931   PetscFunctionReturn(0);
2932 }
2933 
2934 /* -------------------------------------------------------------------*/
2935 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2936        MatGetRow_SeqBAIJ,
2937        MatRestoreRow_SeqBAIJ,
2938        MatMult_SeqBAIJ_N,
2939 /* 4*/ MatMultAdd_SeqBAIJ_N,
2940        MatMultTranspose_SeqBAIJ,
2941        MatMultTransposeAdd_SeqBAIJ,
2942        0,
2943        0,
2944        0,
2945 /*10*/ 0,
2946        MatLUFactor_SeqBAIJ,
2947        0,
2948        0,
2949        MatTranspose_SeqBAIJ,
2950 /*15*/ MatGetInfo_SeqBAIJ,
2951        MatEqual_SeqBAIJ,
2952        MatGetDiagonal_SeqBAIJ,
2953        MatDiagonalScale_SeqBAIJ,
2954        MatNorm_SeqBAIJ,
2955 /*20*/ 0,
2956        MatAssemblyEnd_SeqBAIJ,
2957        MatSetOption_SeqBAIJ,
2958        MatZeroEntries_SeqBAIJ,
2959 /*24*/ MatZeroRows_SeqBAIJ,
2960        0,
2961        0,
2962        0,
2963        0,
2964 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2965        0,
2966        0,
2967        MatGetArray_SeqBAIJ,
2968        MatRestoreArray_SeqBAIJ,
2969 /*34*/ MatDuplicate_SeqBAIJ,
2970        0,
2971        0,
2972        MatILUFactor_SeqBAIJ,
2973        0,
2974 /*39*/ MatAXPY_SeqBAIJ,
2975        MatGetSubMatrices_SeqBAIJ,
2976        MatIncreaseOverlap_SeqBAIJ,
2977        MatGetValues_SeqBAIJ,
2978        MatCopy_SeqBAIJ,
2979 /*44*/ 0,
2980        MatScale_SeqBAIJ,
2981        0,
2982        0,
2983        MatZeroRowsColumns_SeqBAIJ,
2984 /*49*/ MatSetBlockSize_SeqBAIJ,
2985        MatGetRowIJ_SeqBAIJ,
2986        MatRestoreRowIJ_SeqBAIJ,
2987        MatGetColumnIJ_SeqBAIJ,
2988        MatRestoreColumnIJ_SeqBAIJ,
2989 /*54*/ MatFDColoringCreate_SeqAIJ,
2990        0,
2991        0,
2992        0,
2993        MatSetValuesBlocked_SeqBAIJ,
2994 /*59*/ MatGetSubMatrix_SeqBAIJ,
2995        MatDestroy_SeqBAIJ,
2996        MatView_SeqBAIJ,
2997        0,
2998        0,
2999 /*64*/ 0,
3000        0,
3001        0,
3002        0,
3003        0,
3004 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
3005        0,
3006        MatConvert_Basic,
3007        0,
3008        0,
3009 /*74*/ 0,
3010        MatFDColoringApply_BAIJ,
3011        0,
3012        0,
3013        0,
3014 /*79*/ 0,
3015        0,
3016        0,
3017        0,
3018        MatLoad_SeqBAIJ,
3019 /*84*/ 0,
3020        0,
3021        0,
3022        0,
3023        0,
3024 /*89*/ 0,
3025        0,
3026        0,
3027        0,
3028        0,
3029 /*94*/ 0,
3030        0,
3031        0,
3032        0,
3033        0,
3034 /*99*/0,
3035        0,
3036        0,
3037        0,
3038        0,
3039 /*104*/0,
3040        MatRealPart_SeqBAIJ,
3041        MatImaginaryPart_SeqBAIJ,
3042        0,
3043        0,
3044 /*109*/0,
3045        0,
3046        0,
3047        0,
3048        MatMissingDiagonal_SeqBAIJ,
3049 /*114*/0,
3050        0,
3051        0,
3052        0,
3053        0,
3054 /*119*/0,
3055        0,
3056        MatMultHermitianTranspose_SeqBAIJ,
3057        MatMultHermitianTransposeAdd_SeqBAIJ,
3058        0,
3059 };
3060 
3061 EXTERN_C_BEGIN
3062 #undef __FUNCT__
3063 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3064 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3065 {
3066   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3067   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3068   PetscErrorCode ierr;
3069 
3070   PetscFunctionBegin;
3071   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3072 
3073   /* allocate space for values if not already there */
3074   if (!aij->saved_values) {
3075     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3076     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3077   }
3078 
3079   /* copy values over */
3080   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3081   PetscFunctionReturn(0);
3082 }
3083 EXTERN_C_END
3084 
3085 EXTERN_C_BEGIN
3086 #undef __FUNCT__
3087 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3088 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3089 {
3090   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3091   PetscErrorCode ierr;
3092   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3093 
3094   PetscFunctionBegin;
3095   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3096   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3097 
3098   /* copy values over */
3099   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3100   PetscFunctionReturn(0);
3101 }
3102 EXTERN_C_END
3103 
3104 EXTERN_C_BEGIN
3105 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3106 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3107 EXTERN_C_END
3108 
3109 EXTERN_C_BEGIN
3110 #undef __FUNCT__
3111 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3112 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3113 {
3114   Mat_SeqBAIJ    *b;
3115   PetscErrorCode ierr;
3116   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
3117   PetscBool      flg,skipallocation = PETSC_FALSE;
3118 
3119   PetscFunctionBegin;
3120 
3121   if (nz == MAT_SKIP_ALLOCATION) {
3122     skipallocation = PETSC_TRUE;
3123     nz             = 0;
3124   }
3125 
3126   if (bs < 0) {
3127     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
3128       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3129     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3130     bs   = PetscAbs(bs);
3131   }
3132   if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
3133   bs   = newbs;
3134 
3135   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3136   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3137   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3138   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3139 
3140   B->preallocated = PETSC_TRUE;
3141 
3142   mbs  = B->rmap->n/bs;
3143   nbs  = B->cmap->n/bs;
3144   bs2  = bs*bs;
3145 
3146   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3147 
3148   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3149   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3150   if (nnz) {
3151     for (i=0; i<mbs; i++) {
3152       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3153       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3154     }
3155   }
3156 
3157   b       = (Mat_SeqBAIJ*)B->data;
3158   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3159     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3160   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3161 
3162   if (!flg) {
3163     switch (bs) {
3164     case 1:
3165       B->ops->mult            = MatMult_SeqBAIJ_1;
3166       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3167       B->ops->sor             = MatSOR_SeqBAIJ_1;
3168       break;
3169     case 2:
3170       B->ops->mult            = MatMult_SeqBAIJ_2;
3171       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3172       B->ops->sor             = MatSOR_SeqBAIJ_2;
3173       break;
3174     case 3:
3175       B->ops->mult            = MatMult_SeqBAIJ_3;
3176       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3177       B->ops->sor             = MatSOR_SeqBAIJ_3;
3178       break;
3179     case 4:
3180       B->ops->mult            = MatMult_SeqBAIJ_4;
3181       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3182       B->ops->sor             = MatSOR_SeqBAIJ_4;
3183       break;
3184     case 5:
3185       B->ops->mult            = MatMult_SeqBAIJ_5;
3186       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3187       B->ops->sor             = MatSOR_SeqBAIJ_5;
3188       break;
3189     case 6:
3190       B->ops->mult            = MatMult_SeqBAIJ_6;
3191       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3192       B->ops->sor             = MatSOR_SeqBAIJ_6;
3193       break;
3194     case 7:
3195       B->ops->mult            = MatMult_SeqBAIJ_7;
3196       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3197       B->ops->sor             = MatSOR_SeqBAIJ_7;
3198       break;
3199     case 15:
3200       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3201       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3202       B->ops->sor             = MatSOR_SeqBAIJ_N;
3203       break;
3204     default:
3205       B->ops->mult            = MatMult_SeqBAIJ_N;
3206       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3207       B->ops->sor             = MatSOR_SeqBAIJ_N;
3208       break;
3209     }
3210   }
3211   B->rmap->bs  = bs;
3212   b->mbs       = mbs;
3213   b->nbs       = nbs;
3214   if (!skipallocation) {
3215     if (!b->imax) {
3216       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3217       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3218       b->free_imax_ilen = PETSC_TRUE;
3219     }
3220     /* b->ilen will count nonzeros in each block row so far. */
3221     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3222     if (!nnz) {
3223       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3224       else if (nz < 0) nz = 1;
3225       for (i=0; i<mbs; i++) b->imax[i] = nz;
3226       nz = nz*mbs;
3227     } else {
3228       nz = 0;
3229       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3230     }
3231 
3232     /* allocate the matrix space */
3233     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3234     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3235     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3236     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3237     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3238     b->singlemalloc = PETSC_TRUE;
3239     b->i[0] = 0;
3240     for (i=1; i<mbs+1; i++) {
3241       b->i[i] = b->i[i-1] + b->imax[i-1];
3242     }
3243     b->free_a     = PETSC_TRUE;
3244     b->free_ij    = PETSC_TRUE;
3245   } else {
3246     b->free_a     = PETSC_FALSE;
3247     b->free_ij    = PETSC_FALSE;
3248   }
3249 
3250   B->rmap->bs          = bs;
3251   b->bs2              = bs2;
3252   b->mbs              = mbs;
3253   b->nz               = 0;
3254   b->maxnz            = nz;
3255   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3256   PetscFunctionReturn(0);
3257 }
3258 EXTERN_C_END
3259 
3260 EXTERN_C_BEGIN
3261 #undef __FUNCT__
3262 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3263 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3264 {
3265   PetscInt       i,m,nz,nz_max=0,*nnz;
3266   PetscScalar    *values=0;
3267   PetscErrorCode ierr;
3268 
3269   PetscFunctionBegin;
3270   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3271   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3272   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3273   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3274   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3275   m = B->rmap->n/bs;
3276 
3277   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3278   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3279   for(i=0; i<m; i++) {
3280     nz = ii[i+1]- ii[i];
3281     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3282     nz_max = PetscMax(nz_max, nz);
3283     nnz[i] = nz;
3284   }
3285   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3286   ierr = PetscFree(nnz);CHKERRQ(ierr);
3287 
3288   values = (PetscScalar*)V;
3289   if (!values) {
3290     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3291     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3292   }
3293   for (i=0; i<m; i++) {
3294     PetscInt          ncols  = ii[i+1] - ii[i];
3295     const PetscInt    *icols = jj + ii[i];
3296     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3297     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3298   }
3299   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3300   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3301   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3302 
3303   PetscFunctionReturn(0);
3304 }
3305 EXTERN_C_END
3306 
3307 
3308 EXTERN_C_BEGIN
3309 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3310 extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3311 #if defined(PETSC_HAVE_MUMPS)
3312 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3313 #endif
3314 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3315 EXTERN_C_END
3316 
3317 /*MC
3318    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3319    block sparse compressed row format.
3320 
3321    Options Database Keys:
3322 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3323 
3324   Level: beginner
3325 
3326 .seealso: MatCreateSeqBAIJ()
3327 M*/
3328 
3329 EXTERN_C_BEGIN
3330 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3331 EXTERN_C_END
3332 
3333 EXTERN_C_BEGIN
3334 #undef __FUNCT__
3335 #define __FUNCT__ "MatCreate_SeqBAIJ"
3336 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3337 {
3338   PetscErrorCode ierr;
3339   PetscMPIInt    size;
3340   Mat_SeqBAIJ    *b;
3341 
3342   PetscFunctionBegin;
3343   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3344   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3345 
3346   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3347   B->data = (void*)b;
3348   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3349   b->row                   = 0;
3350   b->col                   = 0;
3351   b->icol                  = 0;
3352   b->reallocs              = 0;
3353   b->saved_values          = 0;
3354 
3355   b->roworiented           = PETSC_TRUE;
3356   b->nonew                 = 0;
3357   b->diag                  = 0;
3358   b->solve_work            = 0;
3359   b->mult_work             = 0;
3360   B->spptr                 = 0;
3361   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3362   b->keepnonzeropattern    = PETSC_FALSE;
3363   b->xtoy                  = 0;
3364   b->XtoY                  = 0;
3365   B->same_nonzero          = PETSC_FALSE;
3366 
3367   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3368                                      "MatGetFactorAvailable_seqbaij_petsc",
3369                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3370   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3371                                      "MatGetFactor_seqbaij_petsc",
3372                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3373   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3374                                      "MatGetFactor_seqbaij_bstrm",
3375                                      MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3376 #if defined(PETSC_HAVE_MUMPS)
3377   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3378 #endif
3379   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3380                                      "MatInvertBlockDiagonal_SeqBAIJ",
3381                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3382   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3383                                      "MatStoreValues_SeqBAIJ",
3384                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3385   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3386                                      "MatRetrieveValues_SeqBAIJ",
3387                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3388   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3389                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3390                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3391   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3392                                      "MatConvert_SeqBAIJ_SeqAIJ",
3393                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3394   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3395                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3396                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3397   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3398                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3399                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3400   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3401                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3402                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3403   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3404                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3405                                       MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3406   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3407                                      "MatIsTranspose_SeqBAIJ",
3408                                       MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3409   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3410   PetscFunctionReturn(0);
3411 }
3412 EXTERN_C_END
3413 
3414 #undef __FUNCT__
3415 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3416 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3417 {
3418   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3419   PetscErrorCode ierr;
3420   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3421 
3422   PetscFunctionBegin;
3423   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3424 
3425   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3426     c->imax = a->imax;
3427     c->ilen = a->ilen;
3428     c->free_imax_ilen = PETSC_FALSE;
3429   } else {
3430     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3431     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3432     for (i=0; i<mbs; i++) {
3433       c->imax[i] = a->imax[i];
3434       c->ilen[i] = a->ilen[i];
3435     }
3436     c->free_imax_ilen = PETSC_TRUE;
3437   }
3438 
3439   /* allocate the matrix space */
3440   if (mallocmatspace){
3441     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3442       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3443       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3444       c->singlemalloc = PETSC_FALSE;
3445       c->free_ij      = PETSC_FALSE;
3446       c->i            = a->i;
3447       c->j            = a->j;
3448       c->parent       = A;
3449       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3450       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3451       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3452     } else {
3453       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3454       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3455       c->singlemalloc = PETSC_TRUE;
3456       c->free_ij      = PETSC_TRUE;
3457       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3458       if (mbs > 0) {
3459 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3460 	if (cpvalues == MAT_COPY_VALUES) {
3461 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3462 	} else {
3463 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3464 	}
3465       }
3466     }
3467   }
3468 
3469   c->roworiented = a->roworiented;
3470   c->nonew       = a->nonew;
3471   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3472   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3473   c->bs2         = a->bs2;
3474   c->mbs         = a->mbs;
3475   c->nbs         = a->nbs;
3476 
3477   if (a->diag) {
3478     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3479       c->diag      = a->diag;
3480       c->free_diag = PETSC_FALSE;
3481     } else {
3482       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3483       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3484       for (i=0; i<mbs; i++) {
3485         c->diag[i] = a->diag[i];
3486       }
3487       c->free_diag = PETSC_TRUE;
3488     }
3489   } else c->diag        = 0;
3490   c->nz                 = a->nz;
3491   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3492   c->solve_work         = 0;
3493   c->mult_work          = 0;
3494   c->free_a             = PETSC_TRUE;
3495   c->free_ij            = PETSC_TRUE;
3496   C->preallocated       = PETSC_TRUE;
3497   C->assembled          = PETSC_TRUE;
3498 
3499   c->compressedrow.use     = a->compressedrow.use;
3500   c->compressedrow.nrows   = a->compressedrow.nrows;
3501   c->compressedrow.check   = a->compressedrow.check;
3502   if (a->compressedrow.use){
3503     i = a->compressedrow.nrows;
3504     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3505     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3506     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3507     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3508   } else {
3509     c->compressedrow.use    = PETSC_FALSE;
3510     c->compressedrow.i      = PETSC_NULL;
3511     c->compressedrow.rindex = PETSC_NULL;
3512   }
3513   C->same_nonzero = A->same_nonzero;
3514   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3515   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3516   PetscFunctionReturn(0);
3517 }
3518 
3519 #undef __FUNCT__
3520 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3521 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3522 {
3523     PetscErrorCode ierr;
3524 
3525   PetscFunctionBegin;
3526   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3527   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3528   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3529   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3530   PetscFunctionReturn(0);
3531 }
3532 
3533 #undef __FUNCT__
3534 #define __FUNCT__ "MatLoad_SeqBAIJ"
3535 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3536 {
3537   Mat_SeqBAIJ    *a;
3538   PetscErrorCode ierr;
3539   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3540   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3541   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3542   PetscInt       *masked,nmask,tmp,bs2,ishift;
3543   PetscMPIInt    size;
3544   int            fd;
3545   PetscScalar    *aa;
3546   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3547 
3548   PetscFunctionBegin;
3549   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3550   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3551   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3552   bs2  = bs*bs;
3553 
3554   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3555   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3556   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3557   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3558   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3559   M = header[1]; N = header[2]; nz = header[3];
3560 
3561   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3562   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3563 
3564   /*
3565      This code adds extra rows to make sure the number of rows is
3566     divisible by the blocksize
3567   */
3568   mbs        = M/bs;
3569   extra_rows = bs - M + bs*(mbs);
3570   if (extra_rows == bs) extra_rows = 0;
3571   else                  mbs++;
3572   if (extra_rows) {
3573     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3574   }
3575 
3576   /* Set global sizes if not already set */
3577   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3578     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3579   } else { /* Check if the matrix global sizes are correct */
3580     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3581     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3582   }
3583 
3584   /* read in row lengths */
3585   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3586   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3587   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3588 
3589   /* read in column indices */
3590   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3591   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3592   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3593 
3594   /* loop over row lengths determining block row lengths */
3595   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3596   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3597   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3598   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3599   rowcount = 0;
3600   nzcount = 0;
3601   for (i=0; i<mbs; i++) {
3602     nmask = 0;
3603     for (j=0; j<bs; j++) {
3604       kmax = rowlengths[rowcount];
3605       for (k=0; k<kmax; k++) {
3606         tmp = jj[nzcount++]/bs;
3607         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3608       }
3609       rowcount++;
3610     }
3611     browlengths[i] += nmask;
3612     /* zero out the mask elements we set */
3613     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3614   }
3615 
3616   /* Do preallocation  */
3617   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3618   a = (Mat_SeqBAIJ*)newmat->data;
3619 
3620   /* set matrix "i" values */
3621   a->i[0] = 0;
3622   for (i=1; i<= mbs; i++) {
3623     a->i[i]      = a->i[i-1] + browlengths[i-1];
3624     a->ilen[i-1] = browlengths[i-1];
3625   }
3626   a->nz         = 0;
3627   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3628 
3629   /* read in nonzero values */
3630   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3631   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3632   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3633 
3634   /* set "a" and "j" values into matrix */
3635   nzcount = 0; jcount = 0;
3636   for (i=0; i<mbs; i++) {
3637     nzcountb = nzcount;
3638     nmask    = 0;
3639     for (j=0; j<bs; j++) {
3640       kmax = rowlengths[i*bs+j];
3641       for (k=0; k<kmax; k++) {
3642         tmp = jj[nzcount++]/bs;
3643 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3644       }
3645     }
3646     /* sort the masked values */
3647     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3648 
3649     /* set "j" values into matrix */
3650     maskcount = 1;
3651     for (j=0; j<nmask; j++) {
3652       a->j[jcount++]  = masked[j];
3653       mask[masked[j]] = maskcount++;
3654     }
3655     /* set "a" values into matrix */
3656     ishift = bs2*a->i[i];
3657     for (j=0; j<bs; j++) {
3658       kmax = rowlengths[i*bs+j];
3659       for (k=0; k<kmax; k++) {
3660         tmp       = jj[nzcountb]/bs ;
3661         block     = mask[tmp] - 1;
3662         point     = jj[nzcountb] - bs*tmp;
3663         idx       = ishift + bs2*block + j + bs*point;
3664         a->a[idx] = (MatScalar)aa[nzcountb++];
3665       }
3666     }
3667     /* zero out the mask elements we set */
3668     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3669   }
3670   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3671 
3672   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3673   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3674   ierr = PetscFree(aa);CHKERRQ(ierr);
3675   ierr = PetscFree(jj);CHKERRQ(ierr);
3676   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3677 
3678   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3679   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3680   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3681   PetscFunctionReturn(0);
3682 }
3683 
3684 #undef __FUNCT__
3685 #define __FUNCT__ "MatCreateSeqBAIJ"
3686 /*@C
3687    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3688    compressed row) format.  For good matrix assembly performance the
3689    user should preallocate the matrix storage by setting the parameter nz
3690    (or the array nnz).  By setting these parameters accurately, performance
3691    during matrix assembly can be increased by more than a factor of 50.
3692 
3693    Collective on MPI_Comm
3694 
3695    Input Parameters:
3696 +  comm - MPI communicator, set to PETSC_COMM_SELF
3697 .  bs - size of block
3698 .  m - number of rows
3699 .  n - number of columns
3700 .  nz - number of nonzero blocks  per block row (same for all rows)
3701 -  nnz - array containing the number of nonzero blocks in the various block rows
3702          (possibly different for each block row) or PETSC_NULL
3703 
3704    Output Parameter:
3705 .  A - the matrix
3706 
3707    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3708    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3709    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3710 
3711    Options Database Keys:
3712 .   -mat_no_unroll - uses code that does not unroll the loops in the
3713                      block calculations (much slower)
3714 .    -mat_block_size - size of the blocks to use
3715 
3716    Level: intermediate
3717 
3718    Notes:
3719    The number of rows and columns must be divisible by blocksize.
3720 
3721    If the nnz parameter is given then the nz parameter is ignored
3722 
3723    A nonzero block is any block that as 1 or more nonzeros in it
3724 
3725    The block AIJ format is fully compatible with standard Fortran 77
3726    storage.  That is, the stored row and column indices can begin at
3727    either one (as in Fortran) or zero.  See the users' manual for details.
3728 
3729    Specify the preallocated storage with either nz or nnz (not both).
3730    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3731    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3732    matrices.
3733 
3734 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3735 @*/
3736 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3737 {
3738   PetscErrorCode ierr;
3739 
3740   PetscFunctionBegin;
3741   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3742   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3743   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3744   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3745   PetscFunctionReturn(0);
3746 }
3747 
3748 #undef __FUNCT__
3749 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3750 /*@C
3751    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3752    per row in the matrix. For good matrix assembly performance the
3753    user should preallocate the matrix storage by setting the parameter nz
3754    (or the array nnz).  By setting these parameters accurately, performance
3755    during matrix assembly can be increased by more than a factor of 50.
3756 
3757    Collective on MPI_Comm
3758 
3759    Input Parameters:
3760 +  A - the matrix
3761 .  bs - size of block
3762 .  nz - number of block nonzeros per block row (same for all rows)
3763 -  nnz - array containing the number of block nonzeros in the various block rows
3764          (possibly different for each block row) or PETSC_NULL
3765 
3766    Options Database Keys:
3767 .   -mat_no_unroll - uses code that does not unroll the loops in the
3768                      block calculations (much slower)
3769 .    -mat_block_size - size of the blocks to use
3770 
3771    Level: intermediate
3772 
3773    Notes:
3774    If the nnz parameter is given then the nz parameter is ignored
3775 
3776    You can call MatGetInfo() to get information on how effective the preallocation was;
3777    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3778    You can also run with the option -info and look for messages with the string
3779    malloc in them to see if additional memory allocation was needed.
3780 
3781    The block AIJ format is fully compatible with standard Fortran 77
3782    storage.  That is, the stored row and column indices can begin at
3783    either one (as in Fortran) or zero.  See the users' manual for details.
3784 
3785    Specify the preallocated storage with either nz or nnz (not both).
3786    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3787    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3788 
3789 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3790 @*/
3791 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3792 {
3793   PetscErrorCode ierr;
3794 
3795   PetscFunctionBegin;
3796   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3797   PetscFunctionReturn(0);
3798 }
3799 
3800 #undef __FUNCT__
3801 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3802 /*@C
3803    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3804    (the default sequential PETSc format).
3805 
3806    Collective on MPI_Comm
3807 
3808    Input Parameters:
3809 +  A - the matrix
3810 .  i - the indices into j for the start of each local row (starts with zero)
3811 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3812 -  v - optional values in the matrix
3813 
3814    Level: developer
3815 
3816 .keywords: matrix, aij, compressed row, sparse
3817 
3818 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3819 @*/
3820 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3821 {
3822   PetscErrorCode ierr;
3823 
3824   PetscFunctionBegin;
3825   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3826   PetscFunctionReturn(0);
3827 }
3828 
3829 
3830 #undef __FUNCT__
3831 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3832 /*@
3833      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3834 
3835      Collective on MPI_Comm
3836 
3837    Input Parameters:
3838 +  comm - must be an MPI communicator of size 1
3839 .  bs - size of block
3840 .  m - number of rows
3841 .  n - number of columns
3842 .  i - row indices
3843 .  j - column indices
3844 -  a - matrix values
3845 
3846    Output Parameter:
3847 .  mat - the matrix
3848 
3849    Level: advanced
3850 
3851    Notes:
3852        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3853     once the matrix is destroyed
3854 
3855        You cannot set new nonzero locations into this matrix, that will generate an error.
3856 
3857        The i and j indices are 0 based
3858 
3859        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3860 
3861 
3862 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3863 
3864 @*/
3865 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3866 {
3867   PetscErrorCode ierr;
3868   PetscInt       ii;
3869   Mat_SeqBAIJ    *baij;
3870 
3871   PetscFunctionBegin;
3872   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3873   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3874 
3875   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3876   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3877   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3878   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3879   baij = (Mat_SeqBAIJ*)(*mat)->data;
3880   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3881   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3882 
3883   baij->i = i;
3884   baij->j = j;
3885   baij->a = a;
3886   baij->singlemalloc = PETSC_FALSE;
3887   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3888   baij->free_a       = PETSC_FALSE;
3889   baij->free_ij      = PETSC_FALSE;
3890 
3891   for (ii=0; ii<m; ii++) {
3892     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3893 #if defined(PETSC_USE_DEBUG)
3894     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3895 #endif
3896   }
3897 #if defined(PETSC_USE_DEBUG)
3898   for (ii=0; ii<baij->i[m]; ii++) {
3899     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3900     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3901   }
3902 #endif
3903 
3904   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3905   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3906   PetscFunctionReturn(0);
3907 }
3908