xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 42eaa7f3b58bc3af4b9ef28b03ccaa8b4c1282ac)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"  /*I   "petscmat.h"  I*/
8 #include "petscblaslapack.h"
9 #include "../src/mat/blockinvert.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
13 /*@
14   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
15 
16   Collective on Mat
17 
18   Input Parameters:
19 . mat - the matrix
20 
21   Level: advanced
22 @*/
23 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
24 {
25   PetscErrorCode ierr,(*f)(Mat);
26 
27   PetscFunctionBegin;
28   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
29   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
30   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
31 
32   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
33   if (f) {
34     ierr = (*f)(mat);CHKERRQ(ierr);
35   } else {
36     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 EXTERN_C_BEGIN
42 #undef __FUNCT__
43 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
44 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
45 {
46   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
47   PetscErrorCode ierr;
48   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
49   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
50   PetscReal      shift = 0.0;
51 
52   PetscFunctionBegin;
53   if (a->idiagvalid) PetscFunctionReturn(0);
54   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
55   diag_offset = a->diag;
56   if (!a->idiag) {
57     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
58     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs2*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 1:
65       for (i=0; i<mbs; i++) {
66         odiag = v + 1*diag_offset[i];
67         diag[0]  = odiag[0];
68         mdiag[0] = odiag[0];
69         diag[0]  = 1.0 / (diag[0] + shift);
70         diag    += 1;
71         mdiag   += 1;
72       }
73       break;
74     case 2:
75       for (i=0; i<mbs; i++) {
76         odiag   = v + 4*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
79 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
80 	diag    += 4;
81 	mdiag   += 4;
82       }
83       break;
84     case 3:
85       for (i=0; i<mbs; i++) {
86         odiag    = v + 9*diag_offset[i];
87         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
88         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
89         diag[8]  = odiag[8];
90         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
91         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
92         mdiag[8] = odiag[8];
93 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
94 	diag    += 9;
95 	mdiag   += 9;
96       }
97       break;
98     case 4:
99       for (i=0; i<mbs; i++) {
100         odiag  = v + 16*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
104 	diag  += 16;
105 	mdiag += 16;
106       }
107       break;
108     case 5:
109       for (i=0; i<mbs; i++) {
110         odiag = v + 25*diag_offset[i];
111         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
112         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
113 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
114 	diag  += 25;
115 	mdiag += 25;
116       }
117       break;
118     case 6:
119       for (i=0; i<mbs; i++) {
120         odiag = v + 36*diag_offset[i];
121         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
122         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
123 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
124 	diag  += 36;
125 	mdiag += 36;
126       }
127       break;
128     case 7:
129       for (i=0; i<mbs; i++) {
130         odiag = v + 49*diag_offset[i];
131         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
132         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
133 	ierr   = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
134 	diag  += 49;
135 	mdiag += 49;
136       }
137       break;
138     default:
139       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
140       for (i=0; i<mbs; i++) {
141         odiag = v + bs2*diag_offset[i];
142         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
143         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
144         ierr   = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
145 	diag  += bs2;
146 	mdiag += bs2;
147       }
148       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
149   }
150   a->idiagvalid = PETSC_TRUE;
151   PetscFunctionReturn(0);
152 }
153 EXTERN_C_END
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
157 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
158 {
159   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
160   PetscScalar        *x,x1,s1;
161   const PetscScalar  *b;
162   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
163   PetscErrorCode     ierr;
164   PetscInt           m = a->mbs,i,i2,nz,j;
165   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
166 
167   PetscFunctionBegin;
168   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
169   its = its*lits;
170   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
171   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
172   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
173   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
174   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
175 
176   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
177 
178   diag  = a->diag;
179   idiag = a->idiag;
180   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
181   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
182 
183   if (flag & SOR_ZERO_INITIAL_GUESS) {
184     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
185       x[0] = b[0]*idiag[0];
186       i2     = 1;
187       idiag += 1;
188       for (i=1; i<m; i++) {
189         v     = aa + ai[i];
190         vi    = aj + ai[i];
191         nz    = diag[i] - ai[i];
192         s1    = b[i2];
193         for (j=0; j<nz; j++) {
194           s1 -= v[j]*x[vi[j]];
195         }
196         x[i2]   = idiag[0]*s1;
197         idiag   += 1;
198         i2      += 1;
199       }
200       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
201       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
202     }
203     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
204         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
205       i2    = 0;
206       mdiag = a->idiag+a->mbs;
207       for (i=0; i<m; i++) {
208         x1      = x[i2];
209         x[i2]   = mdiag[0]*x1;
210         mdiag  += 1;
211         i2     += 1;
212       }
213       ierr = PetscLogFlops(m);CHKERRQ(ierr);
214     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
215       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
216     }
217     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
218       idiag   = a->idiag+a->mbs - 1;
219       i2      = m - 1;
220       x1      = x[i2];
221       x[i2]   = idiag[0]*x1;
222       idiag -= 1;
223       i2    -= 1;
224       for (i=m-2; i>=0; i--) {
225         v     = aa + (diag[i]+1);
226         vi    = aj + diag[i] + 1;
227         nz    = ai[i+1] - diag[i] - 1;
228         s1    = x[i2];
229         for (j=0; j<nz; j++) {
230           s1 -= v[j]*x[vi[j]];
231         }
232         x[i2]   = idiag[0]*s1;
233         idiag   -= 1;
234         i2      -= 1;
235       }
236       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
237     }
238   } else {
239     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
240   }
241   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 #undef __FUNCT__
247 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
248 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
249 {
250   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
251   PetscScalar        *x,x1,x2,s1,s2;
252   const PetscScalar  *b;
253   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
254   PetscErrorCode     ierr;
255   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
256   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
257 
258   PetscFunctionBegin;
259   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
260   its = its*lits;
261   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
262   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
263   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
264   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
265   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
266 
267   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
268 
269   diag  = a->diag;
270   idiag = a->idiag;
271   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
272   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
273 
274   if (flag & SOR_ZERO_INITIAL_GUESS) {
275     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
276       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
277       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
278       i2     = 2;
279       idiag += 4;
280       for (i=1; i<m; i++) {
281 	v     = aa + 4*ai[i];
282 	vi    = aj + ai[i];
283 	nz    = diag[i] - ai[i];
284 	s1    = b[i2]; s2 = b[i2+1];
285         for (j=0; j<nz; j++) {
286 	  idx  = 2*vi[j];
287           it   = 4*j;
288 	  x1   = x[idx]; x2 = x[1+idx];
289 	  s1  -= v[it]*x1 + v[it+2]*x2;
290 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
291 	}
292 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
293 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
294         idiag   += 4;
295         i2      += 2;
296       }
297       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
298       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
299     }
300     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
301         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
302       i2    = 0;
303       mdiag = a->idiag+4*a->mbs;
304       for (i=0; i<m; i++) {
305         x1      = x[i2]; x2 = x[i2+1];
306         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
307         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
308         mdiag  += 4;
309         i2     += 2;
310       }
311       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
312     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
313       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
314     }
315     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
316       idiag   = a->idiag+4*a->mbs - 4;
317       i2      = 2*m - 2;
318       x1      = x[i2]; x2 = x[i2+1];
319       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
320       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
321       idiag -= 4;
322       i2    -= 2;
323       for (i=m-2; i>=0; i--) {
324 	v     = aa + 4*(diag[i]+1);
325 	vi    = aj + diag[i] + 1;
326 	nz    = ai[i+1] - diag[i] - 1;
327 	s1    = x[i2]; s2 = x[i2+1];
328         for (j=0; j<nz; j++) {
329  	  idx  = 2*vi[j];
330           it   = 4*j;
331 	  x1   = x[idx]; x2 = x[1+idx];
332 	  s1  -= v[it]*x1 + v[it+2]*x2;
333 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
334 	}
335 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
336 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
337         idiag   -= 4;
338         i2      -= 2;
339       }
340       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
341     }
342   } else {
343     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
344   }
345   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
346   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
352 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
353 {
354   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
355   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
356   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
357   const PetscScalar  *b;
358   PetscErrorCode     ierr;
359   PetscInt           m = a->mbs,i,i2,nz,idx;
360   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
361 
362   PetscFunctionBegin;
363   its = its*lits;
364   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
365   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
366   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
367   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
368   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
369   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
370 
371   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
372 
373   diag  = a->diag;
374   idiag = a->idiag;
375   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
376   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
377 
378   if (flag & SOR_ZERO_INITIAL_GUESS) {
379     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
380       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
381       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
382       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
383       i2     = 3;
384       idiag += 9;
385       for (i=1; i<m; i++) {
386         v     = aa + 9*ai[i];
387         vi    = aj + ai[i];
388         nz    = diag[i] - ai[i];
389         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
390         while (nz--) {
391           idx  = 3*(*vi++);
392           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
393           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
394           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
395           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
396           v   += 9;
397         }
398         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
399         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
400         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
401         idiag   += 9;
402         i2      += 3;
403       }
404       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
405       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
406     }
407     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
408         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
409       i2    = 0;
410       mdiag = a->idiag+9*a->mbs;
411       for (i=0; i<m; i++) {
412         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
413         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
414         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
415         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
416         mdiag  += 9;
417         i2     += 3;
418       }
419       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
420     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
421       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
422     }
423     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
424       idiag   = a->idiag+9*a->mbs - 9;
425       i2      = 3*m - 3;
426       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
427       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
428       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
429       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
430       idiag -= 9;
431       i2    -= 3;
432       for (i=m-2; i>=0; i--) {
433         v     = aa + 9*(diag[i]+1);
434         vi    = aj + diag[i] + 1;
435         nz    = ai[i+1] - diag[i] - 1;
436         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
437         while (nz--) {
438           idx  = 3*(*vi++);
439           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
440           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
441           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
442           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
443           v   += 9;
444         }
445         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
446         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
447         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
448         idiag   -= 9;
449         i2      -= 3;
450       }
451       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
452     }
453   } else {
454     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
455   }
456   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
457   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
463 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
464 {
465   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
466   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
467   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
468   const PetscScalar  *b;
469   PetscErrorCode     ierr;
470   PetscInt           m = a->mbs,i,i2,nz,idx;
471   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
472 
473   PetscFunctionBegin;
474   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
475   its = its*lits;
476   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
477   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
478   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
479   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
480   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
481 
482   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
483 
484   diag  = a->diag;
485   idiag = a->idiag;
486   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
487   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
488 
489   if (flag & SOR_ZERO_INITIAL_GUESS) {
490     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
491       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
492       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
493       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
494       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
495       i2     = 4;
496       idiag += 16;
497       for (i=1; i<m; i++) {
498 	v     = aa + 16*ai[i];
499 	vi    = aj + ai[i];
500 	nz    = diag[i] - ai[i];
501 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
502 	while (nz--) {
503 	  idx  = 4*(*vi++);
504 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
505 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
506 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
507 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
508 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
509 	  v   += 16;
510 	}
511 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
512 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
513 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
514 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
515         idiag   += 16;
516         i2      += 4;
517       }
518       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
519       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
520     }
521     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
522         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
523       i2    = 0;
524       mdiag = a->idiag+16*a->mbs;
525       for (i=0; i<m; i++) {
526         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
527         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
528         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
529         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
530         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
531         mdiag  += 16;
532         i2     += 4;
533       }
534       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
535     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
536       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
537     }
538     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
539       idiag   = a->idiag+16*a->mbs - 16;
540       i2      = 4*m - 4;
541       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
542       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
543       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
544       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
545       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
546       idiag -= 16;
547       i2    -= 4;
548       for (i=m-2; i>=0; i--) {
549 	v     = aa + 16*(diag[i]+1);
550 	vi    = aj + diag[i] + 1;
551 	nz    = ai[i+1] - diag[i] - 1;
552 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
553 	while (nz--) {
554 	  idx  = 4*(*vi++);
555 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
556 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
557 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
558 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
559 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
560 	  v   += 16;
561 	}
562 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
563 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
564 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
565 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
566         idiag   -= 16;
567         i2      -= 4;
568       }
569       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
570     }
571   } else {
572     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
573   }
574   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
575   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
581 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
582 {
583   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
584   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
585   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
586   const PetscScalar  *b;
587   PetscErrorCode     ierr;
588   PetscInt           m = a->mbs,i,i2,nz,idx;
589   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
590 
591   PetscFunctionBegin;
592   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
593   its = its*lits;
594   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
595   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
596   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
597   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
598   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
599 
600   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
601 
602   diag  = a->diag;
603   idiag = a->idiag;
604   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
605   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
606 
607   if (flag & SOR_ZERO_INITIAL_GUESS) {
608     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
609       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
610       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
611       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
612       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
613       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
614       i2     = 5;
615       idiag += 25;
616       for (i=1; i<m; i++) {
617 	v     = aa + 25*ai[i];
618 	vi    = aj + ai[i];
619 	nz    = diag[i] - ai[i];
620 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
621 	while (nz--) {
622 	  idx  = 5*(*vi++);
623 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
624 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
625 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
626 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
627 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
628 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
629 	  v   += 25;
630 	}
631 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
632 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
633 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
634 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
635 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
636         idiag   += 25;
637         i2      += 5;
638       }
639       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
640       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
641     }
642     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
643         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
644       i2    = 0;
645       mdiag = a->idiag+25*a->mbs;
646       for (i=0; i<m; i++) {
647         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
648         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
649         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
650         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
651         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
652         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
653         mdiag  += 25;
654         i2     += 5;
655       }
656       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
657     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
658       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
659     }
660     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
661       idiag   = a->idiag+25*a->mbs - 25;
662       i2      = 5*m - 5;
663       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
664       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
665       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
666       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
667       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
668       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
669       idiag -= 25;
670       i2    -= 5;
671       for (i=m-2; i>=0; i--) {
672 	v     = aa + 25*(diag[i]+1);
673 	vi    = aj + diag[i] + 1;
674 	nz    = ai[i+1] - diag[i] - 1;
675 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
676 	while (nz--) {
677 	  idx  = 5*(*vi++);
678 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
679 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
680 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
681 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
682 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
683 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
684 	  v   += 25;
685 	}
686 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
687 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
688 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
689 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
690 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
691         idiag   -= 25;
692         i2      -= 5;
693       }
694       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
695     }
696   } else {
697     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
698   }
699   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
700   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
706 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
707 {
708   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
709   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
710   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
711   const PetscScalar  *b;
712   PetscErrorCode     ierr;
713   PetscInt           m = a->mbs,i,i2,nz,idx;
714   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
715 
716   PetscFunctionBegin;
717   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
718   its = its*lits;
719   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
720   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
721   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
722   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
723   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
724 
725   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
726 
727   diag  = a->diag;
728   idiag = a->idiag;
729   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
730   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
731 
732   if (flag & SOR_ZERO_INITIAL_GUESS) {
733     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
734       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
735       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
736       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
737       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
738       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
739       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
740       i2     = 6;
741       idiag += 36;
742       for (i=1; i<m; i++) {
743         v     = aa + 36*ai[i];
744         vi    = aj + ai[i];
745         nz    = diag[i] - ai[i];
746         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
747         while (nz--) {
748           idx  = 6*(*vi++);
749           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
750           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
751           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
752           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
753           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
754           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
755           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
756           v   += 36;
757         }
758         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
759         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
760         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
761         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
762         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
763         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
764         idiag   += 36;
765         i2      += 6;
766       }
767       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
768       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
769     }
770     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
771         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
772       i2    = 0;
773       mdiag = a->idiag+36*a->mbs;
774       for (i=0; i<m; i++) {
775         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
776         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
777         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
778         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
779         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
780         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
781         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
782         mdiag  += 36;
783         i2     += 6;
784       }
785       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
786     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
787       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
788     }
789     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
790       idiag   = a->idiag+36*a->mbs - 36;
791       i2      = 6*m - 6;
792       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
793       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
794       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
795       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
796       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
797       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
798       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
799       idiag -= 36;
800       i2    -= 6;
801       for (i=m-2; i>=0; i--) {
802         v     = aa + 36*(diag[i]+1);
803         vi    = aj + diag[i] + 1;
804         nz    = ai[i+1] - diag[i] - 1;
805         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
806         while (nz--) {
807           idx  = 6*(*vi++);
808           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
809           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
810           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
811           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
812           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
813           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
814           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
815           v   += 36;
816         }
817         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
818         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
819         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
820         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
821         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
822         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
823         idiag   -= 36;
824         i2      -= 6;
825       }
826       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
827     }
828   } else {
829     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
830   }
831   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
832   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
838 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
839 {
840   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
841   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
842   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
843   const PetscScalar  *b;
844   PetscErrorCode     ierr;
845   PetscInt           m = a->mbs,i,i2,nz,idx;
846   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
847 
848   PetscFunctionBegin;
849   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
850   its = its*lits;
851   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
852   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
853   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
854   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
855   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
856 
857   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
858 
859   diag  = a->diag;
860   idiag = a->idiag;
861   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
862   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
863 
864   if (flag & SOR_ZERO_INITIAL_GUESS) {
865     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
866       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
867       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
868       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
869       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
870       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
871       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
872       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
873       i2     = 7;
874       idiag += 49;
875       for (i=1; i<m; i++) {
876         v     = aa + 49*ai[i];
877         vi    = aj + ai[i];
878         nz    = diag[i] - ai[i];
879         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
880         while (nz--) {
881           idx  = 7*(*vi++);
882           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
883           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
884           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
885           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
886           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
887           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
888           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
889           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
890           v   += 49;
891         }
892         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
893         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
894         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
895         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
896         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
897         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
898         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
899         idiag   += 49;
900         i2      += 7;
901       }
902       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
903       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
904     }
905     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
906         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
907       i2    = 0;
908       mdiag = a->idiag+49*a->mbs;
909       for (i=0; i<m; i++) {
910         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
911         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
912         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
913         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
914         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
915         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
916         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
917         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
918         mdiag  += 49;
919         i2     += 7;
920       }
921       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
922     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
923       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
924     }
925     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
926       idiag   = a->idiag+49*a->mbs - 49;
927       i2      = 7*m - 7;
928       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
929       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
930       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
931       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
932       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
933       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
934       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
935       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
936       idiag -= 49;
937       i2    -= 7;
938       for (i=m-2; i>=0; i--) {
939         v     = aa + 49*(diag[i]+1);
940         vi    = aj + diag[i] + 1;
941         nz    = ai[i+1] - diag[i] - 1;
942         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
943         while (nz--) {
944           idx  = 7*(*vi++);
945           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
946           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
947           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
948           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
949           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
950           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
951           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
952           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
953           v   += 49;
954         }
955         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
956         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
957         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
958         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
959         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
960         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
961         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
962         idiag   -= 49;
963         i2      -= 7;
964       }
965       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
966     }
967   } else {
968     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
969   }
970   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
971   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
977 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
978 {
979   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
980   PetscScalar        *x,*work,*w,*workt;
981   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
982   const PetscScalar  *b;
983   PetscErrorCode     ierr;
984   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
985   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
986 
987   PetscFunctionBegin;
988   its = its*lits;
989   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
990   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
991   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
992   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
993   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
994   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
995 
996   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
997 
998   diag  = a->diag;
999   idiag = a->idiag;
1000   if (!a->mult_work) {
1001     k    = PetscMax(A->rmap->n,A->cmap->n);
1002     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1003   }
1004   work = a->mult_work;
1005   if (!a->sor_work) {
1006     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
1007   }
1008   w = a->sor_work;
1009 
1010   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1011   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1012 
1013   if (flag & SOR_ZERO_INITIAL_GUESS) {
1014     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1015       Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
1016       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
1017       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
1018       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
1019       i2     = bs;
1020       idiag += bs2;
1021       for (i=1; i<m; i++) {
1022         v     = aa + bs2*ai[i];
1023         vi    = aj + ai[i];
1024         nz    = diag[i] - ai[i];
1025 
1026         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1027 	/* copy all rows of x that are needed into contiguous space */
1028         workt = work;
1029         for (j=0; j<nz; j++) {
1030           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1031           workt += bs;
1032         }
1033         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1034        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1035         while (nz--) {
1036           idx  = N*(*vi++);
1037           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1038           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1039           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1040           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1041           v   += N2;
1042 	  } */
1043 
1044         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1045         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1046         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1047         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1048 
1049         idiag   += bs2;
1050         i2      += bs;
1051       }
1052       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1053       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1054     }
1055     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1056         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1057       i2    = 0;
1058       mdiag = a->idiag+bs2*a->mbs;
1059       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1060       for (i=0; i<m; i++) {
1061         Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1062         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1063         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1064         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1065         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1066 
1067         mdiag  += bs2;
1068         i2     += bs;
1069       }
1070       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1071     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1072       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1073     }
1074     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1075       idiag   = a->idiag+bs2*a->mbs - bs2;
1076       i2      = bs*m - bs;
1077       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1078       Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1079       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1080       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1081       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1082       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1083       idiag -= bs2;
1084       i2    -= bs;
1085       for (i=m-2; i>=0; i--) {
1086         v     = aa + bs2*(diag[i]+1);
1087         vi    = aj + diag[i] + 1;
1088         nz    = ai[i+1] - diag[i] - 1;
1089 
1090         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1091 	/* copy all rows of x that are needed into contiguous space */
1092         workt = work;
1093         for (j=0; j<nz; j++) {
1094           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1095           workt += bs;
1096         }
1097         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1098         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1099         while (nz--) {
1100           idx  = N*(*vi++);
1101           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1102           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1103           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1104           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1105           v   += N2;
1106 	  } */
1107 
1108         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1109         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1110         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1111         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1112         idiag   -= bs2;
1113         i2      -= bs;
1114       }
1115       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1116     }
1117   } else {
1118     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1119   }
1120   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1121   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 /*
1126     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1127 */
1128 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1129 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1130 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1131 #define matsetvaluesblocked4_ matsetvaluesblocked4
1132 #endif
1133 
1134 EXTERN_C_BEGIN
1135 #undef __FUNCT__
1136 #define __FUNCT__ "matsetvaluesblocked4_"
1137 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1138 {
1139   Mat               A = *AA;
1140   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1141   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1142   PetscInt          *ai=a->i,*ailen=a->ilen;
1143   PetscInt          *aj=a->j,stepval,lastcol = -1;
1144   const PetscScalar *value = v;
1145   MatScalar         *ap,*aa = a->a,*bap;
1146 
1147   PetscFunctionBegin;
1148   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1149   stepval = (n-1)*4;
1150   for (k=0; k<m; k++) { /* loop over added rows */
1151     row  = im[k];
1152     rp   = aj + ai[row];
1153     ap   = aa + 16*ai[row];
1154     nrow = ailen[row];
1155     low  = 0;
1156     high = nrow;
1157     for (l=0; l<n; l++) { /* loop over added columns */
1158       col = in[l];
1159       if (col <= lastcol) low = 0; else high = nrow;
1160       lastcol = col;
1161       value = v + k*(stepval+4 + l)*4;
1162       while (high-low > 7) {
1163         t = (low+high)/2;
1164         if (rp[t] > col) high = t;
1165         else             low  = t;
1166       }
1167       for (i=low; i<high; i++) {
1168         if (rp[i] > col) break;
1169         if (rp[i] == col) {
1170           bap  = ap +  16*i;
1171           for (ii=0; ii<4; ii++,value+=stepval) {
1172             for (jj=ii; jj<16; jj+=4) {
1173               bap[jj] += *value++;
1174             }
1175           }
1176           goto noinsert2;
1177         }
1178       }
1179       N = nrow++ - 1;
1180       high++; /* added new column index thus must search to one higher than before */
1181       /* shift up all the later entries in this row */
1182       for (ii=N; ii>=i; ii--) {
1183         rp[ii+1] = rp[ii];
1184         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1185       }
1186       if (N >= i) {
1187         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1188       }
1189       rp[i] = col;
1190       bap   = ap +  16*i;
1191       for (ii=0; ii<4; ii++,value+=stepval) {
1192         for (jj=ii; jj<16; jj+=4) {
1193           bap[jj] = *value++;
1194         }
1195       }
1196       noinsert2:;
1197       low = i;
1198     }
1199     ailen[row] = nrow;
1200   }
1201   PetscFunctionReturnVoid();
1202 }
1203 EXTERN_C_END
1204 
1205 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1206 #define matsetvalues4_ MATSETVALUES4
1207 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1208 #define matsetvalues4_ matsetvalues4
1209 #endif
1210 
1211 EXTERN_C_BEGIN
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatSetValues4_"
1214 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1215 {
1216   Mat         A = *AA;
1217   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1218   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1219   PetscInt    *ai=a->i,*ailen=a->ilen;
1220   PetscInt    *aj=a->j,brow,bcol;
1221   PetscInt    ridx,cidx,lastcol = -1;
1222   MatScalar   *ap,value,*aa=a->a,*bap;
1223 
1224   PetscFunctionBegin;
1225   for (k=0; k<m; k++) { /* loop over added rows */
1226     row  = im[k]; brow = row/4;
1227     rp   = aj + ai[brow];
1228     ap   = aa + 16*ai[brow];
1229     nrow = ailen[brow];
1230     low  = 0;
1231     high = nrow;
1232     for (l=0; l<n; l++) { /* loop over added columns */
1233       col = in[l]; bcol = col/4;
1234       ridx = row % 4; cidx = col % 4;
1235       value = v[l + k*n];
1236       if (col <= lastcol) low = 0; else high = nrow;
1237       lastcol = col;
1238       while (high-low > 7) {
1239         t = (low+high)/2;
1240         if (rp[t] > bcol) high = t;
1241         else              low  = t;
1242       }
1243       for (i=low; i<high; i++) {
1244         if (rp[i] > bcol) break;
1245         if (rp[i] == bcol) {
1246           bap  = ap +  16*i + 4*cidx + ridx;
1247           *bap += value;
1248           goto noinsert1;
1249         }
1250       }
1251       N = nrow++ - 1;
1252       high++; /* added new column thus must search to one higher than before */
1253       /* shift up all the later entries in this row */
1254       for (ii=N; ii>=i; ii--) {
1255         rp[ii+1] = rp[ii];
1256         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1257       }
1258       if (N>=i) {
1259         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1260       }
1261       rp[i]                    = bcol;
1262       ap[16*i + 4*cidx + ridx] = value;
1263       noinsert1:;
1264       low = i;
1265     }
1266     ailen[brow] = nrow;
1267   }
1268   PetscFunctionReturnVoid();
1269 }
1270 EXTERN_C_END
1271 
1272 /*
1273      Checks for missing diagonals
1274 */
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1277 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1278 {
1279   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1280   PetscErrorCode ierr;
1281   PetscInt       *diag,*jj = a->j,i;
1282 
1283   PetscFunctionBegin;
1284   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1285   *missing = PETSC_FALSE;
1286   if (A->rmap->n > 0 && !jj) {
1287     *missing  = PETSC_TRUE;
1288     if (d) *d = 0;
1289     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1290   } else {
1291     diag     = a->diag;
1292     for (i=0; i<a->mbs; i++) {
1293       if (jj[diag[i]] != i) {
1294         *missing  = PETSC_TRUE;
1295         if (d) *d = i;
1296         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1297       }
1298     }
1299   }
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1305 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1306 {
1307   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1308   PetscErrorCode ierr;
1309   PetscInt       i,j,m = a->mbs;
1310 
1311   PetscFunctionBegin;
1312   if (!a->diag) {
1313     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1314     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1315     a->free_diag = PETSC_TRUE;
1316   }
1317   for (i=0; i<m; i++) {
1318     a->diag[i] = a->i[i+1];
1319     for (j=a->i[i]; j<a->i[i+1]; j++) {
1320       if (a->j[j] == i) {
1321         a->diag[i] = j;
1322         break;
1323       }
1324     }
1325   }
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 
1330 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1334 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1335 {
1336   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1337   PetscErrorCode ierr;
1338   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1339   PetscInt       *tia, *tja;
1340 
1341   PetscFunctionBegin;
1342   *nn = n;
1343   if (!ia) PetscFunctionReturn(0);
1344   if (symmetric) {
1345     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1346     nz   = tia[n];
1347   } else {
1348     tia = a->i; tja = a->j;
1349   }
1350 
1351   if (!blockcompressed && bs > 1) {
1352     (*nn) *= bs;
1353     /* malloc & create the natural set of indices */
1354     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1355     if (n) {
1356       (*ia)[0] = 0;
1357       for (j=1; j<bs; j++) {
1358         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1359       }
1360     }
1361 
1362     for (i=1; i<n; i++) {
1363       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1364       for (j=1; j<bs; j++) {
1365         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1366       }
1367     }
1368     if (n) {
1369       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1370     }
1371 
1372     if (ja) {
1373       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1374       cnt = 0;
1375       for (i=0; i<n; i++) {
1376         for (j=0; j<bs; j++) {
1377           for (k=tia[i]; k<tia[i+1]; k++) {
1378             for (l=0; l<bs; l++) {
1379               (*ja)[cnt++] = bs*tja[k] + l;
1380 	    }
1381           }
1382         }
1383       }
1384     }
1385 
1386     n     *= bs;
1387     nz *= bs*bs;
1388     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1389       ierr = PetscFree(tia);CHKERRQ(ierr);
1390       ierr = PetscFree(tja);CHKERRQ(ierr);
1391     }
1392   } else if (oshift == 1) {
1393     if (symmetric) {
1394       PetscInt nz = tia[A->rmap->n/bs];
1395       /*  add 1 to i and j indices */
1396       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1397       *ia = tia;
1398       if (ja) {
1399 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1400         *ja = tja;
1401       }
1402     } else {
1403       PetscInt nz = a->i[A->rmap->n/bs];
1404       /* malloc space and  add 1 to i and j indices */
1405       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1406       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1407       if (ja) {
1408 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1409 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1410       }
1411     }
1412   } else {
1413     *ia = tia;
1414     if (ja) *ja = tja;
1415   }
1416 
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1422 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1423 {
1424   PetscErrorCode ierr;
1425 
1426   PetscFunctionBegin;
1427   if (!ia) PetscFunctionReturn(0);
1428   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1429     ierr = PetscFree(*ia);CHKERRQ(ierr);
1430     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1431   }
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1437 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1438 {
1439   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1440   PetscErrorCode ierr;
1441 
1442   PetscFunctionBegin;
1443 #if defined(PETSC_USE_LOG)
1444   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1445 #endif
1446   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1447   if (a->row) {
1448     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1449   }
1450   if (a->col) {
1451     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1452   }
1453   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1454   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1455   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1456   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1457   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1458   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1459   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1460   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1461   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1462   if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);}
1463 
1464   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1465   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
1466   ierr = PetscFree(a);CHKERRQ(ierr);
1467 
1468   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1469   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1470   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1471   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1472   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1473   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1474   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1475   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1476   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1482 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1483 {
1484   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1485   PetscErrorCode ierr;
1486 
1487   PetscFunctionBegin;
1488   switch (op) {
1489   case MAT_ROW_ORIENTED:
1490     a->roworiented    = flg;
1491     break;
1492   case MAT_KEEP_NONZERO_PATTERN:
1493     a->keepnonzeropattern = flg;
1494     break;
1495   case MAT_NEW_NONZERO_LOCATIONS:
1496     a->nonew          = (flg ? 0 : 1);
1497     break;
1498   case MAT_NEW_NONZERO_LOCATION_ERR:
1499     a->nonew          = (flg ? -1 : 0);
1500     break;
1501   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1502     a->nonew          = (flg ? -2 : 0);
1503     break;
1504   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1505     a->nounused       = (flg ? -1 : 0);
1506     break;
1507   case MAT_NEW_DIAGONALS:
1508   case MAT_IGNORE_OFF_PROC_ENTRIES:
1509   case MAT_USE_HASH_TABLE:
1510     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1511     break;
1512   case MAT_SYMMETRIC:
1513   case MAT_STRUCTURALLY_SYMMETRIC:
1514   case MAT_HERMITIAN:
1515   case MAT_SYMMETRY_ETERNAL:
1516     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1517     break;
1518   default:
1519     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1520   }
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 #undef __FUNCT__
1525 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1526 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1527 {
1528   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1529   PetscErrorCode ierr;
1530   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1531   MatScalar      *aa,*aa_i;
1532   PetscScalar    *v_i;
1533 
1534   PetscFunctionBegin;
1535   bs  = A->rmap->bs;
1536   ai  = a->i;
1537   aj  = a->j;
1538   aa  = a->a;
1539   bs2 = a->bs2;
1540 
1541   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1542 
1543   bn  = row/bs;   /* Block number */
1544   bp  = row % bs; /* Block Position */
1545   M   = ai[bn+1] - ai[bn];
1546   *nz = bs*M;
1547 
1548   if (v) {
1549     *v = 0;
1550     if (*nz) {
1551       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1552       for (i=0; i<M; i++) { /* for each block in the block row */
1553         v_i  = *v + i*bs;
1554         aa_i = aa + bs2*(ai[bn] + i);
1555         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1556       }
1557     }
1558   }
1559 
1560   if (idx) {
1561     *idx = 0;
1562     if (*nz) {
1563       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1564       for (i=0; i<M; i++) { /* for each block in the block row */
1565         idx_i = *idx + i*bs;
1566         itmp  = bs*aj[ai[bn] + i];
1567         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1568       }
1569     }
1570   }
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 #undef __FUNCT__
1575 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1576 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1577 {
1578   PetscErrorCode ierr;
1579 
1580   PetscFunctionBegin;
1581   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1582   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1587 
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1590 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1591 {
1592   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1593   Mat            C;
1594   PetscErrorCode ierr;
1595   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1596   PetscInt       *rows,*cols,bs2=a->bs2;
1597   MatScalar      *array;
1598 
1599   PetscFunctionBegin;
1600   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1601   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1602     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1603     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1604 
1605     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1606     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1607     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1608     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1609     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1610     ierr = PetscFree(col);CHKERRQ(ierr);
1611   } else {
1612     C = *B;
1613   }
1614 
1615   array = a->a;
1616   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1617   for (i=0; i<mbs; i++) {
1618     cols[0] = i*bs;
1619     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1620     len = ai[i+1] - ai[i];
1621     for (j=0; j<len; j++) {
1622       rows[0] = (*aj++)*bs;
1623       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1624       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1625       array += bs2;
1626     }
1627   }
1628   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1629 
1630   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1631   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1632 
1633   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1634     *B = C;
1635   } else {
1636     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1637   }
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 #undef __FUNCT__
1642 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1643 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1644 {
1645   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1646   PetscErrorCode ierr;
1647   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1648   int            fd;
1649   PetscScalar    *aa;
1650   FILE           *file;
1651 
1652   PetscFunctionBegin;
1653   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1654   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1655   col_lens[0] = MAT_FILE_CLASSID;
1656 
1657   col_lens[1] = A->rmap->N;
1658   col_lens[2] = A->cmap->n;
1659   col_lens[3] = a->nz*bs2;
1660 
1661   /* store lengths of each row and write (including header) to file */
1662   count = 0;
1663   for (i=0; i<a->mbs; i++) {
1664     for (j=0; j<bs; j++) {
1665       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1666     }
1667   }
1668   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1669   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1670 
1671   /* store column indices (zero start index) */
1672   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1673   count = 0;
1674   for (i=0; i<a->mbs; i++) {
1675     for (j=0; j<bs; j++) {
1676       for (k=a->i[i]; k<a->i[i+1]; k++) {
1677         for (l=0; l<bs; l++) {
1678           jj[count++] = bs*a->j[k] + l;
1679         }
1680       }
1681     }
1682   }
1683   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1684   ierr = PetscFree(jj);CHKERRQ(ierr);
1685 
1686   /* store nonzero values */
1687   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1688   count = 0;
1689   for (i=0; i<a->mbs; i++) {
1690     for (j=0; j<bs; j++) {
1691       for (k=a->i[i]; k<a->i[i+1]; k++) {
1692         for (l=0; l<bs; l++) {
1693           aa[count++] = a->a[bs2*k + l*bs + j];
1694         }
1695       }
1696     }
1697   }
1698   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1699   ierr = PetscFree(aa);CHKERRQ(ierr);
1700 
1701   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1702   if (file) {
1703     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1704   }
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1710 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1711 {
1712   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1713   PetscErrorCode    ierr;
1714   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1715   PetscViewerFormat format;
1716 
1717   PetscFunctionBegin;
1718   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1719   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1720     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1721   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1722     Mat aij;
1723     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1724     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1725     ierr = MatDestroy(aij);CHKERRQ(ierr);
1726   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1727      PetscFunctionReturn(0);
1728   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1729     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1730     for (i=0; i<a->mbs; i++) {
1731       for (j=0; j<bs; j++) {
1732         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1733         for (k=a->i[i]; k<a->i[i+1]; k++) {
1734           for (l=0; l<bs; l++) {
1735 #if defined(PETSC_USE_COMPLEX)
1736             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1737               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1738                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1739             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1740               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1741                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1742             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1743               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1744             }
1745 #else
1746             if (a->a[bs2*k + l*bs + j] != 0.0) {
1747               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1748             }
1749 #endif
1750           }
1751         }
1752         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1753       }
1754     }
1755     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1756   } else {
1757     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1758     for (i=0; i<a->mbs; i++) {
1759       for (j=0; j<bs; j++) {
1760         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1761         for (k=a->i[i]; k<a->i[i+1]; k++) {
1762           for (l=0; l<bs; l++) {
1763 #if defined(PETSC_USE_COMPLEX)
1764             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1765               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1766                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1767             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1768               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1769                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1770             } else {
1771               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1772             }
1773 #else
1774             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1775 #endif
1776           }
1777         }
1778         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1779       }
1780     }
1781     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1782   }
1783   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 #undef __FUNCT__
1788 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1789 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1790 {
1791   Mat            A = (Mat) Aa;
1792   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1793   PetscErrorCode ierr;
1794   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1795   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1796   MatScalar      *aa;
1797   PetscViewer    viewer;
1798 
1799   PetscFunctionBegin;
1800 
1801   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1802   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1803 
1804   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1805 
1806   /* loop over matrix elements drawing boxes */
1807   color = PETSC_DRAW_BLUE;
1808   for (i=0,row=0; i<mbs; i++,row+=bs) {
1809     for (j=a->i[i]; j<a->i[i+1]; j++) {
1810       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1811       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1812       aa = a->a + j*bs2;
1813       for (k=0; k<bs; k++) {
1814         for (l=0; l<bs; l++) {
1815           if (PetscRealPart(*aa++) >=  0.) continue;
1816           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1817         }
1818       }
1819     }
1820   }
1821   color = PETSC_DRAW_CYAN;
1822   for (i=0,row=0; i<mbs; i++,row+=bs) {
1823     for (j=a->i[i]; j<a->i[i+1]; j++) {
1824       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1825       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1826       aa = a->a + j*bs2;
1827       for (k=0; k<bs; k++) {
1828         for (l=0; l<bs; l++) {
1829           if (PetscRealPart(*aa++) != 0.) continue;
1830           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1831         }
1832       }
1833     }
1834   }
1835 
1836   color = PETSC_DRAW_RED;
1837   for (i=0,row=0; i<mbs; i++,row+=bs) {
1838     for (j=a->i[i]; j<a->i[i+1]; j++) {
1839       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1840       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1841       aa = a->a + j*bs2;
1842       for (k=0; k<bs; k++) {
1843         for (l=0; l<bs; l++) {
1844           if (PetscRealPart(*aa++) <= 0.) continue;
1845           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1846         }
1847       }
1848     }
1849   }
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 #undef __FUNCT__
1854 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1855 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1856 {
1857   PetscErrorCode ierr;
1858   PetscReal      xl,yl,xr,yr,w,h;
1859   PetscDraw      draw;
1860   PetscTruth     isnull;
1861 
1862   PetscFunctionBegin;
1863 
1864   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1865   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1866 
1867   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1868   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1869   xr += w;    yr += h;  xl = -w;     yl = -h;
1870   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1871   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1872   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 #undef __FUNCT__
1877 #define __FUNCT__ "MatView_SeqBAIJ"
1878 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1879 {
1880   PetscErrorCode ierr;
1881   PetscTruth     iascii,isbinary,isdraw;
1882 
1883   PetscFunctionBegin;
1884   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1885   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1886   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1887   if (iascii){
1888     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1889   } else if (isbinary) {
1890     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1891   } else if (isdraw) {
1892     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1893   } else {
1894     Mat B;
1895     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1896     ierr = MatView(B,viewer);CHKERRQ(ierr);
1897     ierr = MatDestroy(B);CHKERRQ(ierr);
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1905 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1906 {
1907   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1908   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1909   PetscInt    *ai = a->i,*ailen = a->ilen;
1910   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1911   MatScalar   *ap,*aa = a->a;
1912 
1913   PetscFunctionBegin;
1914   for (k=0; k<m; k++) { /* loop over rows */
1915     row  = im[k]; brow = row/bs;
1916     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1917     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1918     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1919     nrow = ailen[brow];
1920     for (l=0; l<n; l++) { /* loop over columns */
1921       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1922       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1923       col  = in[l] ;
1924       bcol = col/bs;
1925       cidx = col%bs;
1926       ridx = row%bs;
1927       high = nrow;
1928       low  = 0; /* assume unsorted */
1929       while (high-low > 5) {
1930         t = (low+high)/2;
1931         if (rp[t] > bcol) high = t;
1932         else             low  = t;
1933       }
1934       for (i=low; i<high; i++) {
1935         if (rp[i] > bcol) break;
1936         if (rp[i] == bcol) {
1937           *v++ = ap[bs2*i+bs*cidx+ridx];
1938           goto finished;
1939         }
1940       }
1941       *v++ = 0.0;
1942       finished:;
1943     }
1944   }
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1950 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1951 {
1952   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1953   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1954   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1955   PetscErrorCode    ierr;
1956   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1957   PetscTruth        roworiented=a->roworiented;
1958   const PetscScalar *value = v;
1959   MatScalar         *ap,*aa = a->a,*bap;
1960 
1961   PetscFunctionBegin;
1962   if (roworiented) {
1963     stepval = (n-1)*bs;
1964   } else {
1965     stepval = (m-1)*bs;
1966   }
1967   for (k=0; k<m; k++) { /* loop over added rows */
1968     row  = im[k];
1969     if (row < 0) continue;
1970 #if defined(PETSC_USE_DEBUG)
1971     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1972 #endif
1973     rp   = aj + ai[row];
1974     ap   = aa + bs2*ai[row];
1975     rmax = imax[row];
1976     nrow = ailen[row];
1977     low  = 0;
1978     high = nrow;
1979     for (l=0; l<n; l++) { /* loop over added columns */
1980       if (in[l] < 0) continue;
1981 #if defined(PETSC_USE_DEBUG)
1982       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1983 #endif
1984       col = in[l];
1985       if (roworiented) {
1986         value = v + (k*(stepval+bs) + l)*bs;
1987       } else {
1988         value = v + (l*(stepval+bs) + k)*bs;
1989       }
1990       if (col <= lastcol) low = 0; else high = nrow;
1991       lastcol = col;
1992       while (high-low > 7) {
1993         t = (low+high)/2;
1994         if (rp[t] > col) high = t;
1995         else             low  = t;
1996       }
1997       for (i=low; i<high; i++) {
1998         if (rp[i] > col) break;
1999         if (rp[i] == col) {
2000           bap  = ap +  bs2*i;
2001           if (roworiented) {
2002             if (is == ADD_VALUES) {
2003               for (ii=0; ii<bs; ii++,value+=stepval) {
2004                 for (jj=ii; jj<bs2; jj+=bs) {
2005                   bap[jj] += *value++;
2006                 }
2007               }
2008             } else {
2009               for (ii=0; ii<bs; ii++,value+=stepval) {
2010                 for (jj=ii; jj<bs2; jj+=bs) {
2011                   bap[jj] = *value++;
2012                 }
2013               }
2014             }
2015           } else {
2016             if (is == ADD_VALUES) {
2017               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2018                 for (jj=0; jj<bs; jj++) {
2019                   bap[jj] += value[jj];
2020                 }
2021                 bap += bs;
2022               }
2023             } else {
2024               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2025                 for (jj=0; jj<bs; jj++) {
2026                   bap[jj]  = value[jj];
2027                 }
2028                 bap += bs;
2029               }
2030             }
2031           }
2032           goto noinsert2;
2033         }
2034       }
2035       if (nonew == 1) goto noinsert2;
2036       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2037       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2038       N = nrow++ - 1; high++;
2039       /* shift up all the later entries in this row */
2040       for (ii=N; ii>=i; ii--) {
2041         rp[ii+1] = rp[ii];
2042         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2043       }
2044       if (N >= i) {
2045         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2046       }
2047       rp[i] = col;
2048       bap   = ap +  bs2*i;
2049       if (roworiented) {
2050         for (ii=0; ii<bs; ii++,value+=stepval) {
2051           for (jj=ii; jj<bs2; jj+=bs) {
2052             bap[jj] = *value++;
2053           }
2054         }
2055       } else {
2056         for (ii=0; ii<bs; ii++,value+=stepval) {
2057           for (jj=0; jj<bs; jj++) {
2058             *bap++  = *value++;
2059           }
2060         }
2061       }
2062       noinsert2:;
2063       low = i;
2064     }
2065     ailen[row] = nrow;
2066   }
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 #undef __FUNCT__
2071 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2072 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2073 {
2074   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2075   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2076   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2077   PetscErrorCode ierr;
2078   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2079   MatScalar      *aa = a->a,*ap;
2080   PetscReal      ratio=0.6;
2081 
2082   PetscFunctionBegin;
2083   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2084 
2085   if (m) rmax = ailen[0];
2086   for (i=1; i<mbs; i++) {
2087     /* move each row back by the amount of empty slots (fshift) before it*/
2088     fshift += imax[i-1] - ailen[i-1];
2089     rmax   = PetscMax(rmax,ailen[i]);
2090     if (fshift) {
2091       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2092       N = ailen[i];
2093       for (j=0; j<N; j++) {
2094         ip[j-fshift] = ip[j];
2095         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2096       }
2097     }
2098     ai[i] = ai[i-1] + ailen[i-1];
2099   }
2100   if (mbs) {
2101     fshift += imax[mbs-1] - ailen[mbs-1];
2102     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2103   }
2104   /* reset ilen and imax for each row */
2105   for (i=0; i<mbs; i++) {
2106     ailen[i] = imax[i] = ai[i+1] - ai[i];
2107   }
2108   a->nz = ai[mbs];
2109 
2110   /* diagonals may have moved, so kill the diagonal pointers */
2111   a->idiagvalid = PETSC_FALSE;
2112   if (fshift && a->diag) {
2113     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2114     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2115     a->diag = 0;
2116   }
2117   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2118   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2119   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2120   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2121   A->info.mallocs     += a->reallocs;
2122   a->reallocs          = 0;
2123   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2124 
2125   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
2126   if (a->compressedrow.use){
2127     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2128   }
2129 
2130   A->same_nonzero = PETSC_TRUE;
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 /*
2135    This function returns an array of flags which indicate the locations of contiguous
2136    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2137    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2138    Assume: sizes should be long enough to hold all the values.
2139 */
2140 #undef __FUNCT__
2141 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2142 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2143 {
2144   PetscInt   i,j,k,row;
2145   PetscTruth flg;
2146 
2147   PetscFunctionBegin;
2148   for (i=0,j=0; i<n; j++) {
2149     row = idx[i];
2150     if (row%bs!=0) { /* Not the begining of a block */
2151       sizes[j] = 1;
2152       i++;
2153     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2154       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2155       i++;
2156     } else { /* Begining of the block, so check if the complete block exists */
2157       flg = PETSC_TRUE;
2158       for (k=1; k<bs; k++) {
2159         if (row+k != idx[i+k]) { /* break in the block */
2160           flg = PETSC_FALSE;
2161           break;
2162         }
2163       }
2164       if (flg) { /* No break in the bs */
2165         sizes[j] = bs;
2166         i+= bs;
2167       } else {
2168         sizes[j] = 1;
2169         i++;
2170       }
2171     }
2172   }
2173   *bs_max = j;
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 #undef __FUNCT__
2178 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2179 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2180 {
2181   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2182   PetscErrorCode ierr;
2183   PetscInt       i,j,k,count,*rows;
2184   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2185   PetscScalar    zero = 0.0;
2186   MatScalar      *aa;
2187 
2188   PetscFunctionBegin;
2189   /* Make a copy of the IS and  sort it */
2190   /* allocate memory for rows,sizes */
2191   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2192 
2193   /* copy IS values to rows, and sort them */
2194   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2195   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2196   if (baij->keepnonzeropattern) {
2197     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2198     bs_max = is_n;
2199     A->same_nonzero = PETSC_TRUE;
2200   } else {
2201     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2202     A->same_nonzero = PETSC_FALSE;
2203   }
2204 
2205   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2206     row   = rows[j];
2207     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2208     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2209     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2210     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2211       if (diag != 0.0) {
2212         if (baij->ilen[row/bs] > 0) {
2213           baij->ilen[row/bs]       = 1;
2214           baij->j[baij->i[row/bs]] = row/bs;
2215           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2216         }
2217         /* Now insert all the diagonal values for this bs */
2218         for (k=0; k<bs; k++) {
2219           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2220         }
2221       } else { /* (diag == 0.0) */
2222         baij->ilen[row/bs] = 0;
2223       } /* end (diag == 0.0) */
2224     } else { /* (sizes[i] != bs) */
2225 #if defined (PETSC_USE_DEBUG)
2226       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2227 #endif
2228       for (k=0; k<count; k++) {
2229         aa[0] =  zero;
2230         aa    += bs;
2231       }
2232       if (diag != 0.0) {
2233         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2234       }
2235     }
2236   }
2237 
2238   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2239   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2240   PetscFunctionReturn(0);
2241 }
2242 
2243 #undef __FUNCT__
2244 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2245 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2246 {
2247   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2248   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2249   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2250   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2251   PetscErrorCode ierr;
2252   PetscInt       ridx,cidx,bs2=a->bs2;
2253   PetscTruth     roworiented=a->roworiented;
2254   MatScalar      *ap,value,*aa=a->a,*bap;
2255 
2256   PetscFunctionBegin;
2257   if (v) PetscValidScalarPointer(v,6);
2258   for (k=0; k<m; k++) { /* loop over added rows */
2259     row  = im[k];
2260     brow = row/bs;
2261     if (row < 0) continue;
2262 #if defined(PETSC_USE_DEBUG)
2263     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2264 #endif
2265     rp   = aj + ai[brow];
2266     ap   = aa + bs2*ai[brow];
2267     rmax = imax[brow];
2268     nrow = ailen[brow];
2269     low  = 0;
2270     high = nrow;
2271     for (l=0; l<n; l++) { /* loop over added columns */
2272       if (in[l] < 0) continue;
2273 #if defined(PETSC_USE_DEBUG)
2274       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2275 #endif
2276       col = in[l]; bcol = col/bs;
2277       ridx = row % bs; cidx = col % bs;
2278       if (roworiented) {
2279         value = v[l + k*n];
2280       } else {
2281         value = v[k + l*m];
2282       }
2283       if (col <= lastcol) low = 0; else high = nrow;
2284       lastcol = col;
2285       while (high-low > 7) {
2286         t = (low+high)/2;
2287         if (rp[t] > bcol) high = t;
2288         else              low  = t;
2289       }
2290       for (i=low; i<high; i++) {
2291         if (rp[i] > bcol) break;
2292         if (rp[i] == bcol) {
2293           bap  = ap +  bs2*i + bs*cidx + ridx;
2294           if (is == ADD_VALUES) *bap += value;
2295           else                  *bap  = value;
2296           goto noinsert1;
2297         }
2298       }
2299       if (nonew == 1) goto noinsert1;
2300       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2301       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2302       N = nrow++ - 1; high++;
2303       /* shift up all the later entries in this row */
2304       for (ii=N; ii>=i; ii--) {
2305         rp[ii+1] = rp[ii];
2306         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2307       }
2308       if (N>=i) {
2309         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2310       }
2311       rp[i]                      = bcol;
2312       ap[bs2*i + bs*cidx + ridx] = value;
2313       a->nz++;
2314       noinsert1:;
2315       low = i;
2316     }
2317     ailen[brow] = nrow;
2318   }
2319   A->same_nonzero = PETSC_FALSE;
2320   PetscFunctionReturn(0);
2321 }
2322 
2323 #undef __FUNCT__
2324 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2325 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2326 {
2327   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2328   Mat            outA;
2329   PetscErrorCode ierr;
2330   PetscTruth     row_identity,col_identity;
2331 
2332   PetscFunctionBegin;
2333   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2334   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2335   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2336   if (!row_identity || !col_identity) {
2337     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2338   }
2339 
2340   outA            = inA;
2341   inA->factortype = MAT_FACTOR_LU;
2342 
2343   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2344 
2345   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2346   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2347   a->row = row;
2348   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2349   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2350   a->col = col;
2351 
2352   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2353   if (a->icol) {
2354     ierr = ISDestroy(a->icol);CHKERRQ(ierr);
2355   }
2356   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2357   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2358 
2359   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2360   if (!a->solve_work) {
2361     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2362     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2363   }
2364   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2365 
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 EXTERN_C_BEGIN
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2372 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2373 {
2374   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2375   PetscInt    i,nz,mbs;
2376 
2377   PetscFunctionBegin;
2378   nz  = baij->maxnz;
2379   mbs = baij->mbs;
2380   for (i=0; i<nz; i++) {
2381     baij->j[i] = indices[i];
2382   }
2383   baij->nz = nz;
2384   for (i=0; i<mbs; i++) {
2385     baij->ilen[i] = baij->imax[i];
2386   }
2387   PetscFunctionReturn(0);
2388 }
2389 EXTERN_C_END
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2393 /*@
2394     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2395        in the matrix.
2396 
2397   Input Parameters:
2398 +  mat - the SeqBAIJ matrix
2399 -  indices - the column indices
2400 
2401   Level: advanced
2402 
2403   Notes:
2404     This can be called if you have precomputed the nonzero structure of the
2405   matrix and want to provide it to the matrix object to improve the performance
2406   of the MatSetValues() operation.
2407 
2408     You MUST have set the correct numbers of nonzeros per row in the call to
2409   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2410 
2411     MUST be called before any calls to MatSetValues();
2412 
2413 @*/
2414 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2415 {
2416   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2417 
2418   PetscFunctionBegin;
2419   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2420   PetscValidPointer(indices,2);
2421   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2422   if (f) {
2423     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2424   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 #undef __FUNCT__
2429 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2430 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2431 {
2432   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2433   PetscErrorCode ierr;
2434   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2435   PetscReal      atmp;
2436   PetscScalar    *x,zero = 0.0;
2437   MatScalar      *aa;
2438   PetscInt       ncols,brow,krow,kcol;
2439 
2440   PetscFunctionBegin;
2441   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2442   bs   = A->rmap->bs;
2443   aa   = a->a;
2444   ai   = a->i;
2445   aj   = a->j;
2446   mbs  = a->mbs;
2447 
2448   ierr = VecSet(v,zero);CHKERRQ(ierr);
2449   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2450   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2451   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2452   for (i=0; i<mbs; i++) {
2453     ncols = ai[1] - ai[0]; ai++;
2454     brow  = bs*i;
2455     for (j=0; j<ncols; j++){
2456       for (kcol=0; kcol<bs; kcol++){
2457         for (krow=0; krow<bs; krow++){
2458           atmp = PetscAbsScalar(*aa);aa++;
2459           row = brow + krow;    /* row index */
2460           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2461           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2462         }
2463       }
2464       aj++;
2465     }
2466   }
2467   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "MatCopy_SeqBAIJ"
2473 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2474 {
2475   PetscErrorCode ierr;
2476 
2477   PetscFunctionBegin;
2478   /* If the two matrices have the same copy implementation, use fast copy. */
2479   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2480     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2481     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2482     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2483 
2484     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2485     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2486     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2487   } else {
2488     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2489   }
2490   PetscFunctionReturn(0);
2491 }
2492 
2493 #undef __FUNCT__
2494 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2495 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2496 {
2497   PetscErrorCode ierr;
2498 
2499   PetscFunctionBegin;
2500   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 #undef __FUNCT__
2505 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2506 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2507 {
2508   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2509   PetscFunctionBegin;
2510   *array = a->a;
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 #undef __FUNCT__
2515 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2516 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2517 {
2518   PetscFunctionBegin;
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 #undef __FUNCT__
2523 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2524 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2525 {
2526   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2527   PetscErrorCode ierr;
2528   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2529   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2530 
2531   PetscFunctionBegin;
2532   if (str == SAME_NONZERO_PATTERN) {
2533     PetscScalar alpha = a;
2534     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2535   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2536     if (y->xtoy && y->XtoY != X) {
2537       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2538       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2539     }
2540     if (!y->xtoy) { /* get xtoy */
2541       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2542       y->XtoY = X;
2543       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2544     }
2545     bs2 = bs*bs;
2546     for (i=0; i<x->nz; i++) {
2547       j = 0;
2548       while (j < bs2){
2549         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2550         j++;
2551       }
2552     }
2553     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2554   } else {
2555     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2556   }
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 #undef __FUNCT__
2561 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2562 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2563 {
2564   PetscInt rbs,cbs;
2565   PetscErrorCode ierr;
2566 
2567   PetscFunctionBegin;
2568   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2569   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2570   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2571   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 #undef __FUNCT__
2576 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2577 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2578 {
2579   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2580   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2581   MatScalar      *aa = a->a;
2582 
2583   PetscFunctionBegin;
2584   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2585   PetscFunctionReturn(0);
2586 }
2587 
2588 #undef __FUNCT__
2589 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2590 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2591 {
2592   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2593   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2594   MatScalar      *aa = a->a;
2595 
2596   PetscFunctionBegin;
2597   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2598   PetscFunctionReturn(0);
2599 }
2600 
2601 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2602 
2603 #undef __FUNCT__
2604 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2605 /*
2606     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2607 */
2608 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2609 {
2610   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2611   PetscErrorCode ierr;
2612   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2613   PetscInt       nz = a->i[m],row,*jj,mr,col;
2614 
2615   PetscFunctionBegin;
2616   *nn = n;
2617   if (!ia) PetscFunctionReturn(0);
2618   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2619   else {
2620     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2621     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2622     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2623     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2624     jj = a->j;
2625     for (i=0; i<nz; i++) {
2626       collengths[jj[i]]++;
2627     }
2628     cia[0] = oshift;
2629     for (i=0; i<n; i++) {
2630       cia[i+1] = cia[i] + collengths[i];
2631     }
2632     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2633     jj   = a->j;
2634     for (row=0; row<m; row++) {
2635       mr = a->i[row+1] - a->i[row];
2636       for (i=0; i<mr; i++) {
2637         col = *jj++;
2638         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2639       }
2640     }
2641     ierr = PetscFree(collengths);CHKERRQ(ierr);
2642     *ia = cia; *ja = cja;
2643   }
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 #undef __FUNCT__
2648 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2649 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2650 {
2651   PetscErrorCode ierr;
2652 
2653   PetscFunctionBegin;
2654   if (!ia) PetscFunctionReturn(0);
2655   ierr = PetscFree(*ia);CHKERRQ(ierr);
2656   ierr = PetscFree(*ja);CHKERRQ(ierr);
2657   PetscFunctionReturn(0);
2658 }
2659 
2660 #undef __FUNCT__
2661 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2662 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2663 {
2664   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2665   PetscErrorCode ierr;
2666   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2667   PetscScalar    dx,*y,*xx,*w3_array;
2668   PetscScalar    *vscale_array;
2669   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2670   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2671   void           *fctx = coloring->fctx;
2672   PetscTruth     flg = PETSC_FALSE;
2673   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2674   Vec            x1_tmp;
2675 
2676   PetscFunctionBegin;
2677   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2678   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2679   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2680   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2681 
2682   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2683   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2684   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2685   if (flg) {
2686     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2687   } else {
2688     PetscTruth assembled;
2689     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2690     if (assembled) {
2691       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2692     }
2693   }
2694 
2695   x1_tmp = x1;
2696   if (!coloring->vscale){
2697     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2698   }
2699 
2700   /*
2701     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2702     coloring->F for the coarser grids from the finest
2703   */
2704   if (coloring->F) {
2705     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2706     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2707     if (m1 != m2) {
2708       coloring->F = 0;
2709       }
2710     }
2711 
2712   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2713     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2714   }
2715   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2716 
2717   /* Set w1 = F(x1) */
2718   if (coloring->F) {
2719     w1          = coloring->F; /* use already computed value of function */
2720     coloring->F = 0;
2721   } else {
2722     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2723     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2724     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2725   }
2726 
2727   if (!coloring->w3) {
2728     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2729     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2730   }
2731   w3 = coloring->w3;
2732 
2733     CHKMEMQ;
2734     /* Compute all the local scale factors, including ghost points */
2735   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2736   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2737   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2738   if (ctype == IS_COLORING_GHOSTED){
2739     col_start = 0; col_end = N;
2740   } else if (ctype == IS_COLORING_GLOBAL){
2741     xx = xx - start;
2742     vscale_array = vscale_array - start;
2743     col_start = start; col_end = N + start;
2744   }    CHKMEMQ;
2745   for (col=col_start; col<col_end; col++){
2746     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2747     if (coloring->htype[0] == 'w') {
2748       dx = 1.0 + unorm;
2749     } else {
2750       dx  = xx[col];
2751     }
2752     if (dx == 0.0) dx = 1.0;
2753 #if !defined(PETSC_USE_COMPLEX)
2754     if (dx < umin && dx >= 0.0)      dx = umin;
2755     else if (dx < 0.0 && dx > -umin) dx = -umin;
2756 #else
2757     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2758     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2759 #endif
2760     dx               *= epsilon;
2761     vscale_array[col] = 1.0/dx;
2762   }     CHKMEMQ;
2763   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2764   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2765   if (ctype == IS_COLORING_GLOBAL){
2766     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2767     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2768   }
2769   CHKMEMQ;
2770   if (coloring->vscaleforrow) {
2771     vscaleforrow = coloring->vscaleforrow;
2772   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2773 
2774   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2775   /*
2776     Loop over each color
2777   */
2778   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2779   for (k=0; k<coloring->ncolors; k++) {
2780     coloring->currentcolor = k;
2781     for (i=0; i<bs; i++) {
2782       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2783       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2784       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2785       /*
2786 	Loop over each column associated with color
2787 	adding the perturbation to the vector w3.
2788       */
2789       for (l=0; l<coloring->ncolumns[k]; l++) {
2790 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2791 	if (coloring->htype[0] == 'w') {
2792 	  dx = 1.0 + unorm;
2793 	} else {
2794 	  dx  = xx[col];
2795 	}
2796 	if (dx == 0.0) dx = 1.0;
2797 #if !defined(PETSC_USE_COMPLEX)
2798 	if (dx < umin && dx >= 0.0)      dx = umin;
2799 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2800 #else
2801 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2802 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2803 #endif
2804 	dx            *= epsilon;
2805 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2806 	w3_array[col] += dx;
2807       }
2808       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2809       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2810 
2811       /*
2812 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2813 	w2 = F(x1 + dx) - F(x1)
2814       */
2815       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2816       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2817       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2818       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2819 
2820       /*
2821 	Loop over rows of vector, putting results into Jacobian matrix
2822       */
2823       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2824       for (l=0; l<coloring->nrows[k]; l++) {
2825 	row    = bs*coloring->rows[k][l];             /* local row index */
2826 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2827         for (j=0; j<bs; j++) {
2828   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2829           srows[j]  = row + start + j;
2830         }
2831 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2832       }
2833       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2834     }
2835   } /* endof for each color */
2836   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2837   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2838   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2839   ierr = PetscFree(srows);CHKERRQ(ierr);
2840 
2841   coloring->currentcolor = -1;
2842   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2843   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2844   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2845   PetscFunctionReturn(0);
2846 }
2847 
2848 /* -------------------------------------------------------------------*/
2849 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2850        MatGetRow_SeqBAIJ,
2851        MatRestoreRow_SeqBAIJ,
2852        MatMult_SeqBAIJ_N,
2853 /* 4*/ MatMultAdd_SeqBAIJ_N,
2854        MatMultTranspose_SeqBAIJ,
2855        MatMultTransposeAdd_SeqBAIJ,
2856        0,
2857        0,
2858        0,
2859 /*10*/ 0,
2860        MatLUFactor_SeqBAIJ,
2861        0,
2862        0,
2863        MatTranspose_SeqBAIJ,
2864 /*15*/ MatGetInfo_SeqBAIJ,
2865        MatEqual_SeqBAIJ,
2866        MatGetDiagonal_SeqBAIJ,
2867        MatDiagonalScale_SeqBAIJ,
2868        MatNorm_SeqBAIJ,
2869 /*20*/ 0,
2870        MatAssemblyEnd_SeqBAIJ,
2871        MatSetOption_SeqBAIJ,
2872        MatZeroEntries_SeqBAIJ,
2873 /*24*/ MatZeroRows_SeqBAIJ,
2874        0,
2875        0,
2876        0,
2877        0,
2878 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2879        0,
2880        0,
2881        MatGetArray_SeqBAIJ,
2882        MatRestoreArray_SeqBAIJ,
2883 /*34*/ MatDuplicate_SeqBAIJ,
2884        0,
2885        0,
2886        MatILUFactor_SeqBAIJ,
2887        0,
2888 /*39*/ MatAXPY_SeqBAIJ,
2889        MatGetSubMatrices_SeqBAIJ,
2890        MatIncreaseOverlap_SeqBAIJ,
2891        MatGetValues_SeqBAIJ,
2892        MatCopy_SeqBAIJ,
2893 /*44*/ 0,
2894        MatScale_SeqBAIJ,
2895        0,
2896        0,
2897        0,
2898 /*49*/ MatSetBlockSize_SeqBAIJ,
2899        MatGetRowIJ_SeqBAIJ,
2900        MatRestoreRowIJ_SeqBAIJ,
2901        MatGetColumnIJ_SeqBAIJ,
2902        MatRestoreColumnIJ_SeqBAIJ,
2903 /*54*/ MatFDColoringCreate_SeqAIJ,
2904        0,
2905        0,
2906        0,
2907        MatSetValuesBlocked_SeqBAIJ,
2908 /*59*/ MatGetSubMatrix_SeqBAIJ,
2909        MatDestroy_SeqBAIJ,
2910        MatView_SeqBAIJ,
2911        0,
2912        0,
2913 /*64*/ 0,
2914        0,
2915        0,
2916        0,
2917        0,
2918 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2919        0,
2920        MatConvert_Basic,
2921        0,
2922        0,
2923 /*74*/ 0,
2924        MatFDColoringApply_BAIJ,
2925        0,
2926        0,
2927        0,
2928 /*79*/ 0,
2929        0,
2930        0,
2931        0,
2932        MatLoad_SeqBAIJ,
2933 /*84*/ 0,
2934        0,
2935        0,
2936        0,
2937        0,
2938 /*89*/ 0,
2939        0,
2940        0,
2941        0,
2942        0,
2943 /*94*/ 0,
2944        0,
2945        0,
2946        0,
2947        0,
2948 /*99*/0,
2949        0,
2950        0,
2951        0,
2952        0,
2953 /*104*/0,
2954        MatRealPart_SeqBAIJ,
2955        MatImaginaryPart_SeqBAIJ,
2956        0,
2957        0,
2958 /*109*/0,
2959        0,
2960        0,
2961        0,
2962        MatMissingDiagonal_SeqBAIJ,
2963 /*114*/0,
2964        0,
2965        0,
2966        0,
2967        0,
2968 /*119*/0,
2969        0,
2970        MatMultHermitianTranspose_SeqBAIJ,
2971        MatMultHermitianTransposeAdd_SeqBAIJ,
2972        0,
2973 };
2974 
2975 EXTERN_C_BEGIN
2976 #undef __FUNCT__
2977 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2978 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2979 {
2980   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2981   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2982   PetscErrorCode ierr;
2983 
2984   PetscFunctionBegin;
2985   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2986 
2987   /* allocate space for values if not already there */
2988   if (!aij->saved_values) {
2989     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2990     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2991   }
2992 
2993   /* copy values over */
2994   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2995   PetscFunctionReturn(0);
2996 }
2997 EXTERN_C_END
2998 
2999 EXTERN_C_BEGIN
3000 #undef __FUNCT__
3001 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3002 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
3003 {
3004   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3005   PetscErrorCode ierr;
3006   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3007 
3008   PetscFunctionBegin;
3009   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3010   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3011 
3012   /* copy values over */
3013   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3014   PetscFunctionReturn(0);
3015 }
3016 EXTERN_C_END
3017 
3018 EXTERN_C_BEGIN
3019 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3020 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3021 EXTERN_C_END
3022 
3023 EXTERN_C_BEGIN
3024 #undef __FUNCT__
3025 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3026 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3027 {
3028   Mat_SeqBAIJ    *b;
3029   PetscErrorCode ierr;
3030   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
3031   PetscTruth     flg,skipallocation = PETSC_FALSE;
3032 
3033   PetscFunctionBegin;
3034 
3035   if (nz == MAT_SKIP_ALLOCATION) {
3036     skipallocation = PETSC_TRUE;
3037     nz             = 0;
3038   }
3039 
3040   if (bs < 0) {
3041     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
3042       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3043     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3044     bs   = PetscAbs(bs);
3045   }
3046   if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
3047   bs   = newbs;
3048 
3049   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3050   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3051   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3052   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3053 
3054   B->preallocated = PETSC_TRUE;
3055 
3056   mbs  = B->rmap->n/bs;
3057   nbs  = B->cmap->n/bs;
3058   bs2  = bs*bs;
3059 
3060   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3061 
3062   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3063   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3064   if (nnz) {
3065     for (i=0; i<mbs; i++) {
3066       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3067       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3068     }
3069   }
3070 
3071   b       = (Mat_SeqBAIJ*)B->data;
3072   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3073     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3074   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3075 
3076   if (!flg) {
3077     switch (bs) {
3078     case 1:
3079       B->ops->mult            = MatMult_SeqBAIJ_1;
3080       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3081       B->ops->sor             = MatSOR_SeqBAIJ_1;
3082       break;
3083     case 2:
3084       B->ops->mult            = MatMult_SeqBAIJ_2;
3085       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3086       B->ops->sor             = MatSOR_SeqBAIJ_2;
3087       break;
3088     case 3:
3089       B->ops->mult            = MatMult_SeqBAIJ_3;
3090       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3091       B->ops->sor             = MatSOR_SeqBAIJ_3;
3092       break;
3093     case 4:
3094       B->ops->mult            = MatMult_SeqBAIJ_4;
3095       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3096       B->ops->sor             = MatSOR_SeqBAIJ_4;
3097       break;
3098     case 5:
3099       B->ops->mult            = MatMult_SeqBAIJ_5;
3100       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3101       B->ops->sor             = MatSOR_SeqBAIJ_5;
3102       break;
3103     case 6:
3104       B->ops->mult            = MatMult_SeqBAIJ_6;
3105       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3106       B->ops->sor             = MatSOR_SeqBAIJ_6;
3107       break;
3108     case 7:
3109       B->ops->mult            = MatMult_SeqBAIJ_7;
3110       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3111       B->ops->sor             = MatSOR_SeqBAIJ_7;
3112       break;
3113     case 15:
3114       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3115       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3116       B->ops->sor             = MatSOR_SeqBAIJ_N;
3117       break;
3118     default:
3119       B->ops->mult            = MatMult_SeqBAIJ_N;
3120       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3121       B->ops->sor             = MatSOR_SeqBAIJ_N;
3122       break;
3123     }
3124   }
3125   B->rmap->bs  = bs;
3126   b->mbs       = mbs;
3127   b->nbs       = nbs;
3128   if (!skipallocation) {
3129     if (!b->imax) {
3130       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3131       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3132       b->free_imax_ilen = PETSC_TRUE;
3133     }
3134     /* b->ilen will count nonzeros in each block row so far. */
3135     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3136     if (!nnz) {
3137       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3138       else if (nz < 0) nz = 1;
3139       for (i=0; i<mbs; i++) b->imax[i] = nz;
3140       nz = nz*mbs;
3141     } else {
3142       nz = 0;
3143       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3144     }
3145 
3146     /* allocate the matrix space */
3147     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3148     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3149     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3150     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3151     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3152     b->singlemalloc = PETSC_TRUE;
3153     b->i[0] = 0;
3154     for (i=1; i<mbs+1; i++) {
3155       b->i[i] = b->i[i-1] + b->imax[i-1];
3156     }
3157     b->free_a     = PETSC_TRUE;
3158     b->free_ij    = PETSC_TRUE;
3159   } else {
3160     b->free_a     = PETSC_FALSE;
3161     b->free_ij    = PETSC_FALSE;
3162   }
3163 
3164   B->rmap->bs          = bs;
3165   b->bs2              = bs2;
3166   b->mbs              = mbs;
3167   b->nz               = 0;
3168   b->maxnz            = nz;
3169   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3170   PetscFunctionReturn(0);
3171 }
3172 EXTERN_C_END
3173 
3174 EXTERN_C_BEGIN
3175 #undef __FUNCT__
3176 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3177 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3178 {
3179   PetscInt       i,m,nz,nz_max=0,*nnz;
3180   PetscScalar    *values=0;
3181   PetscErrorCode ierr;
3182 
3183   PetscFunctionBegin;
3184   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3185   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3186   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3187   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3188   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3189   m = B->rmap->n/bs;
3190 
3191   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3192   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3193   for(i=0; i<m; i++) {
3194     nz = ii[i+1]- ii[i];
3195     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3196     nz_max = PetscMax(nz_max, nz);
3197     nnz[i] = nz;
3198   }
3199   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3200   ierr = PetscFree(nnz);CHKERRQ(ierr);
3201 
3202   values = (PetscScalar*)V;
3203   if (!values) {
3204     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3205     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3206   }
3207   for (i=0; i<m; i++) {
3208     PetscInt          ncols  = ii[i+1] - ii[i];
3209     const PetscInt    *icols = jj + ii[i];
3210     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3211     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3212   }
3213   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3214   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3215   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3216 
3217   PetscFunctionReturn(0);
3218 }
3219 EXTERN_C_END
3220 
3221 
3222 EXTERN_C_BEGIN
3223 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3224 #if defined(PETSC_HAVE_MUMPS)
3225 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3226 #endif
3227 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3228 EXTERN_C_END
3229 
3230 /*MC
3231    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3232    block sparse compressed row format.
3233 
3234    Options Database Keys:
3235 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3236 
3237   Level: beginner
3238 
3239 .seealso: MatCreateSeqBAIJ()
3240 M*/
3241 
3242 
3243 EXTERN_C_BEGIN
3244 #undef __FUNCT__
3245 #define __FUNCT__ "MatCreate_SeqBAIJ"
3246 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
3247 {
3248   PetscErrorCode ierr;
3249   PetscMPIInt    size;
3250   Mat_SeqBAIJ    *b;
3251 
3252   PetscFunctionBegin;
3253   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3254   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3255 
3256   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3257   B->data = (void*)b;
3258   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3259   B->mapping               = 0;
3260   b->row                   = 0;
3261   b->col                   = 0;
3262   b->icol                  = 0;
3263   b->reallocs              = 0;
3264   b->saved_values          = 0;
3265 
3266   b->roworiented           = PETSC_TRUE;
3267   b->nonew                 = 0;
3268   b->diag                  = 0;
3269   b->solve_work            = 0;
3270   b->mult_work             = 0;
3271   B->spptr                 = 0;
3272   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3273   b->keepnonzeropattern    = PETSC_FALSE;
3274   b->xtoy                  = 0;
3275   b->XtoY                  = 0;
3276   b->compressedrow.use     = PETSC_FALSE;
3277   b->compressedrow.nrows   = 0;
3278   b->compressedrow.i       = PETSC_NULL;
3279   b->compressedrow.rindex  = PETSC_NULL;
3280   b->compressedrow.checked = PETSC_FALSE;
3281   B->same_nonzero          = PETSC_FALSE;
3282 
3283   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3284                                      "MatGetFactorAvailable_seqbaij_petsc",
3285                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3286   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3287                                      "MatGetFactor_seqbaij_petsc",
3288                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3289 #if defined(PETSC_HAVE_MUMPS)
3290   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3291 #endif
3292   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3293                                      "MatInvertBlockDiagonal_SeqBAIJ",
3294                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3295   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3296                                      "MatStoreValues_SeqBAIJ",
3297                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3298   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3299                                      "MatRetrieveValues_SeqBAIJ",
3300                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3301   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3302                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3303                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3304   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3305                                      "MatConvert_SeqBAIJ_SeqAIJ",
3306                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3307   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3308                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3309                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3310   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3311                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3312                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3313   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3314                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3315                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3316   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3317   PetscFunctionReturn(0);
3318 }
3319 EXTERN_C_END
3320 
3321 #undef __FUNCT__
3322 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3323 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace)
3324 {
3325   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3326   PetscErrorCode ierr;
3327   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3328 
3329   PetscFunctionBegin;
3330   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3331 
3332   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3333     c->imax = a->imax;
3334     c->ilen = a->ilen;
3335     c->free_imax_ilen = PETSC_FALSE;
3336   } else {
3337     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3338     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3339     for (i=0; i<mbs; i++) {
3340       c->imax[i] = a->imax[i];
3341       c->ilen[i] = a->ilen[i];
3342     }
3343     c->free_imax_ilen = PETSC_TRUE;
3344   }
3345 
3346   /* allocate the matrix space */
3347   if (mallocmatspace){
3348     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3349       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3350       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3351       c->singlemalloc = PETSC_FALSE;
3352       c->free_ij      = PETSC_FALSE;
3353       c->i            = a->i;
3354       c->j            = a->j;
3355       c->parent       = A;
3356       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3357       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3358       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3359     } else {
3360       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3361       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3362       c->singlemalloc = PETSC_TRUE;
3363       c->free_ij      = PETSC_TRUE;
3364       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3365       if (mbs > 0) {
3366 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3367 	if (cpvalues == MAT_COPY_VALUES) {
3368 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3369 	} else {
3370 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3371 	}
3372       }
3373     }
3374   }
3375 
3376   c->roworiented = a->roworiented;
3377   c->nonew       = a->nonew;
3378   ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr);
3379   ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr);
3380   c->bs2         = a->bs2;
3381   c->mbs         = a->mbs;
3382   c->nbs         = a->nbs;
3383 
3384   if (a->diag) {
3385     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3386       c->diag      = a->diag;
3387       c->free_diag = PETSC_FALSE;
3388     } else {
3389       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3390       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3391       for (i=0; i<mbs; i++) {
3392         c->diag[i] = a->diag[i];
3393       }
3394       c->free_diag = PETSC_TRUE;
3395     }
3396   } else c->diag        = 0;
3397   c->nz                 = a->nz;
3398   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3399   c->solve_work         = 0;
3400   c->mult_work          = 0;
3401   c->free_a             = PETSC_TRUE;
3402   c->free_ij            = PETSC_TRUE;
3403   C->preallocated       = PETSC_TRUE;
3404   C->assembled          = PETSC_TRUE;
3405 
3406   c->compressedrow.use     = a->compressedrow.use;
3407   c->compressedrow.nrows   = a->compressedrow.nrows;
3408   c->compressedrow.checked = a->compressedrow.checked;
3409   if (a->compressedrow.checked && a->compressedrow.use){
3410     i = a->compressedrow.nrows;
3411     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3412     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3413     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3414     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3415   } else {
3416     c->compressedrow.use    = PETSC_FALSE;
3417     c->compressedrow.i      = PETSC_NULL;
3418     c->compressedrow.rindex = PETSC_NULL;
3419   }
3420   C->same_nonzero = A->same_nonzero;
3421   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3422   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3423   PetscFunctionReturn(0);
3424 }
3425 
3426 #undef __FUNCT__
3427 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3428 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3429 {
3430     PetscErrorCode ierr;
3431 
3432   PetscFunctionBegin;
3433   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3434   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3435   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3436   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3437   PetscFunctionReturn(0);
3438 }
3439 
3440 #undef __FUNCT__
3441 #define __FUNCT__ "MatLoad_SeqBAIJ"
3442 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3443 {
3444   Mat_SeqBAIJ    *a;
3445   PetscErrorCode ierr;
3446   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3447   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3448   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3449   PetscInt       *masked,nmask,tmp,bs2,ishift;
3450   PetscMPIInt    size;
3451   int            fd;
3452   PetscScalar    *aa;
3453   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3454 
3455   PetscFunctionBegin;
3456   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3457   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3458   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3459   bs2  = bs*bs;
3460 
3461   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3462   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3463   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3464   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3465   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3466   M = header[1]; N = header[2]; nz = header[3];
3467 
3468   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3469   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3470 
3471   /*
3472      This code adds extra rows to make sure the number of rows is
3473     divisible by the blocksize
3474   */
3475   mbs        = M/bs;
3476   extra_rows = bs - M + bs*(mbs);
3477   if (extra_rows == bs) extra_rows = 0;
3478   else                  mbs++;
3479   if (extra_rows) {
3480     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3481   }
3482 
3483   /* Set global sizes if not already set */
3484   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3485     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3486   } else { /* Check if the matrix global sizes are correct */
3487     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3488     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3489   }
3490 
3491   /* read in row lengths */
3492   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3493   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3494   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3495 
3496   /* read in column indices */
3497   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3498   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3499   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3500 
3501   /* loop over row lengths determining block row lengths */
3502   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3503   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3504   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3505   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3506   rowcount = 0;
3507   nzcount = 0;
3508   for (i=0; i<mbs; i++) {
3509     nmask = 0;
3510     for (j=0; j<bs; j++) {
3511       kmax = rowlengths[rowcount];
3512       for (k=0; k<kmax; k++) {
3513         tmp = jj[nzcount++]/bs;
3514         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3515       }
3516       rowcount++;
3517     }
3518     browlengths[i] += nmask;
3519     /* zero out the mask elements we set */
3520     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3521   }
3522 
3523   /* Do preallocation  */
3524   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3525   a = (Mat_SeqBAIJ*)newmat->data;
3526 
3527   /* set matrix "i" values */
3528   a->i[0] = 0;
3529   for (i=1; i<= mbs; i++) {
3530     a->i[i]      = a->i[i-1] + browlengths[i-1];
3531     a->ilen[i-1] = browlengths[i-1];
3532   }
3533   a->nz         = 0;
3534   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3535 
3536   /* read in nonzero values */
3537   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3538   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3539   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3540 
3541   /* set "a" and "j" values into matrix */
3542   nzcount = 0; jcount = 0;
3543   for (i=0; i<mbs; i++) {
3544     nzcountb = nzcount;
3545     nmask    = 0;
3546     for (j=0; j<bs; j++) {
3547       kmax = rowlengths[i*bs+j];
3548       for (k=0; k<kmax; k++) {
3549         tmp = jj[nzcount++]/bs;
3550 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3551       }
3552     }
3553     /* sort the masked values */
3554     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3555 
3556     /* set "j" values into matrix */
3557     maskcount = 1;
3558     for (j=0; j<nmask; j++) {
3559       a->j[jcount++]  = masked[j];
3560       mask[masked[j]] = maskcount++;
3561     }
3562     /* set "a" values into matrix */
3563     ishift = bs2*a->i[i];
3564     for (j=0; j<bs; j++) {
3565       kmax = rowlengths[i*bs+j];
3566       for (k=0; k<kmax; k++) {
3567         tmp       = jj[nzcountb]/bs ;
3568         block     = mask[tmp] - 1;
3569         point     = jj[nzcountb] - bs*tmp;
3570         idx       = ishift + bs2*block + j + bs*point;
3571         a->a[idx] = (MatScalar)aa[nzcountb++];
3572       }
3573     }
3574     /* zero out the mask elements we set */
3575     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3576   }
3577   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3578 
3579   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3580   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3581   ierr = PetscFree(aa);CHKERRQ(ierr);
3582   ierr = PetscFree(jj);CHKERRQ(ierr);
3583   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3584 
3585   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3586   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3587   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3588   PetscFunctionReturn(0);
3589 }
3590 
3591 #undef __FUNCT__
3592 #define __FUNCT__ "MatCreateSeqBAIJ"
3593 /*@C
3594    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3595    compressed row) format.  For good matrix assembly performance the
3596    user should preallocate the matrix storage by setting the parameter nz
3597    (or the array nnz).  By setting these parameters accurately, performance
3598    during matrix assembly can be increased by more than a factor of 50.
3599 
3600    Collective on MPI_Comm
3601 
3602    Input Parameters:
3603 +  comm - MPI communicator, set to PETSC_COMM_SELF
3604 .  bs - size of block
3605 .  m - number of rows
3606 .  n - number of columns
3607 .  nz - number of nonzero blocks  per block row (same for all rows)
3608 -  nnz - array containing the number of nonzero blocks in the various block rows
3609          (possibly different for each block row) or PETSC_NULL
3610 
3611    Output Parameter:
3612 .  A - the matrix
3613 
3614    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3615    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3616    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3617 
3618    Options Database Keys:
3619 .   -mat_no_unroll - uses code that does not unroll the loops in the
3620                      block calculations (much slower)
3621 .    -mat_block_size - size of the blocks to use
3622 
3623    Level: intermediate
3624 
3625    Notes:
3626    The number of rows and columns must be divisible by blocksize.
3627 
3628    If the nnz parameter is given then the nz parameter is ignored
3629 
3630    A nonzero block is any block that as 1 or more nonzeros in it
3631 
3632    The block AIJ format is fully compatible with standard Fortran 77
3633    storage.  That is, the stored row and column indices can begin at
3634    either one (as in Fortran) or zero.  See the users' manual for details.
3635 
3636    Specify the preallocated storage with either nz or nnz (not both).
3637    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3638    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3639    matrices.
3640 
3641 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3642 @*/
3643 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3644 {
3645   PetscErrorCode ierr;
3646 
3647   PetscFunctionBegin;
3648   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3649   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3650   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3651   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3652   PetscFunctionReturn(0);
3653 }
3654 
3655 #undef __FUNCT__
3656 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3657 /*@C
3658    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3659    per row in the matrix. For good matrix assembly performance the
3660    user should preallocate the matrix storage by setting the parameter nz
3661    (or the array nnz).  By setting these parameters accurately, performance
3662    during matrix assembly can be increased by more than a factor of 50.
3663 
3664    Collective on MPI_Comm
3665 
3666    Input Parameters:
3667 +  A - the matrix
3668 .  bs - size of block
3669 .  nz - number of block nonzeros per block row (same for all rows)
3670 -  nnz - array containing the number of block nonzeros in the various block rows
3671          (possibly different for each block row) or PETSC_NULL
3672 
3673    Options Database Keys:
3674 .   -mat_no_unroll - uses code that does not unroll the loops in the
3675                      block calculations (much slower)
3676 .    -mat_block_size - size of the blocks to use
3677 
3678    Level: intermediate
3679 
3680    Notes:
3681    If the nnz parameter is given then the nz parameter is ignored
3682 
3683    You can call MatGetInfo() to get information on how effective the preallocation was;
3684    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3685    You can also run with the option -info and look for messages with the string
3686    malloc in them to see if additional memory allocation was needed.
3687 
3688    The block AIJ format is fully compatible with standard Fortran 77
3689    storage.  That is, the stored row and column indices can begin at
3690    either one (as in Fortran) or zero.  See the users' manual for details.
3691 
3692    Specify the preallocated storage with either nz or nnz (not both).
3693    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3694    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3695 
3696 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3697 @*/
3698 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3699 {
3700   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3701 
3702   PetscFunctionBegin;
3703   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3704   if (f) {
3705     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3706   }
3707   PetscFunctionReturn(0);
3708 }
3709 
3710 #undef __FUNCT__
3711 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3712 /*@C
3713    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3714    (the default sequential PETSc format).
3715 
3716    Collective on MPI_Comm
3717 
3718    Input Parameters:
3719 +  A - the matrix
3720 .  i - the indices into j for the start of each local row (starts with zero)
3721 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3722 -  v - optional values in the matrix
3723 
3724    Level: developer
3725 
3726 .keywords: matrix, aij, compressed row, sparse
3727 
3728 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3729 @*/
3730 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3731 {
3732   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3733 
3734   PetscFunctionBegin;
3735   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3736   if (f) {
3737     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3738   }
3739   PetscFunctionReturn(0);
3740 }
3741 
3742 
3743 #undef __FUNCT__
3744 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3745 /*@
3746      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3747 
3748      Collective on MPI_Comm
3749 
3750    Input Parameters:
3751 +  comm - must be an MPI communicator of size 1
3752 .  bs - size of block
3753 .  m - number of rows
3754 .  n - number of columns
3755 .  i - row indices
3756 .  j - column indices
3757 -  a - matrix values
3758 
3759    Output Parameter:
3760 .  mat - the matrix
3761 
3762    Level: advanced
3763 
3764    Notes:
3765        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3766     once the matrix is destroyed
3767 
3768        You cannot set new nonzero locations into this matrix, that will generate an error.
3769 
3770        The i and j indices are 0 based
3771 
3772        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3773 
3774 
3775 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3776 
3777 @*/
3778 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3779 {
3780   PetscErrorCode ierr;
3781   PetscInt       ii;
3782   Mat_SeqBAIJ    *baij;
3783 
3784   PetscFunctionBegin;
3785   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3786   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3787 
3788   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3789   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3790   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3791   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3792   baij = (Mat_SeqBAIJ*)(*mat)->data;
3793   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3794   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3795 
3796   baij->i = i;
3797   baij->j = j;
3798   baij->a = a;
3799   baij->singlemalloc = PETSC_FALSE;
3800   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3801   baij->free_a       = PETSC_FALSE;
3802   baij->free_ij       = PETSC_FALSE;
3803 
3804   for (ii=0; ii<m; ii++) {
3805     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3806 #if defined(PETSC_USE_DEBUG)
3807     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3808 #endif
3809   }
3810 #if defined(PETSC_USE_DEBUG)
3811   for (ii=0; ii<baij->i[m]; ii++) {
3812     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3813     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3814   }
3815 #endif
3816 
3817   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3818   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3819   PetscFunctionReturn(0);
3820 }
3821