xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 12fe8d350da2a1fb5ce0234d2abcef2a979c3670)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"  /*I   "petscmat.h"  I*/
8 #include "petscblaslapack.h"
9 #include "../src/mat/blockinvert.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
13 /*@
14   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
15 
16   Collective on Mat
17 
18   Input Parameters:
19 . mat - the matrix
20 
21   Level: advanced
22 @*/
23 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
24 {
25   PetscErrorCode ierr,(*f)(Mat);
26 
27   PetscFunctionBegin;
28   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
29   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
30   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
31 
32   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
33   if (f) {
34     ierr = (*f)(mat);CHKERRQ(ierr);
35   } else {
36     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 EXTERN_C_BEGIN
42 #undef __FUNCT__
43 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
44 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
45 {
46   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
47   PetscErrorCode ierr;
48   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
49   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
50   PetscReal      shift = 0.0;
51 
52   PetscFunctionBegin;
53   if (a->idiagvalid) PetscFunctionReturn(0);
54   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
55   diag_offset = a->diag;
56   if (!a->idiag) {
57     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
58     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs2*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 1:
65       for (i=0; i<mbs; i++) {
66         odiag = v + 1*diag_offset[i];
67         diag[0]  = odiag[0];
68         mdiag[0] = odiag[0];
69         diag[0]  = 1.0 / (diag[0] + shift);
70         diag    += 1;
71         mdiag   += 1;
72       }
73       break;
74     case 2:
75       for (i=0; i<mbs; i++) {
76         odiag   = v + 4*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
79 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
80 	diag    += 4;
81 	mdiag   += 4;
82       }
83       break;
84     case 3:
85       for (i=0; i<mbs; i++) {
86         odiag    = v + 9*diag_offset[i];
87         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
88         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
89         diag[8]  = odiag[8];
90         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
91         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
92         mdiag[8] = odiag[8];
93 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
94 	diag    += 9;
95 	mdiag   += 9;
96       }
97       break;
98     case 4:
99       for (i=0; i<mbs; i++) {
100         odiag  = v + 16*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
104 	diag  += 16;
105 	mdiag += 16;
106       }
107       break;
108     case 5:
109       for (i=0; i<mbs; i++) {
110         odiag = v + 25*diag_offset[i];
111         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
112         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
113 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
114 	diag  += 25;
115 	mdiag += 25;
116       }
117       break;
118     case 6:
119       for (i=0; i<mbs; i++) {
120         odiag = v + 36*diag_offset[i];
121         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
122         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
123 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
124 	diag  += 36;
125 	mdiag += 36;
126       }
127       break;
128     case 7:
129       for (i=0; i<mbs; i++) {
130         odiag = v + 49*diag_offset[i];
131         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
132         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
133 	ierr   = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
134 	diag  += 49;
135 	mdiag += 49;
136       }
137       break;
138     default:
139       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
140       for (i=0; i<mbs; i++) {
141         odiag = v + bs2*diag_offset[i];
142         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
143         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
144         ierr   = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
145 	diag  += bs2;
146 	mdiag += bs2;
147       }
148       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
149   }
150   a->idiagvalid = PETSC_TRUE;
151   PetscFunctionReturn(0);
152 }
153 EXTERN_C_END
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
157 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
158 {
159   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
160   PetscScalar        *x,x1,s1;
161   const PetscScalar  *b;
162   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
163   PetscErrorCode     ierr;
164   PetscInt           m = a->mbs,i,i2,nz,j;
165   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
166 
167   PetscFunctionBegin;
168   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
169   its = its*lits;
170   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
171   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
172   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
173   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
174   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
175 
176   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
177 
178   diag  = a->diag;
179   idiag = a->idiag;
180   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
181   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
182 
183   if (flag & SOR_ZERO_INITIAL_GUESS) {
184     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
185       x[0] = b[0]*idiag[0];
186       i2     = 1;
187       idiag += 1;
188       for (i=1; i<m; i++) {
189         v     = aa + ai[i];
190         vi    = aj + ai[i];
191         nz    = diag[i] - ai[i];
192         s1    = b[i2];
193         for (j=0; j<nz; j++) {
194           s1 -= v[j]*x[vi[j]];
195         }
196         x[i2]   = idiag[0]*s1;
197         idiag   += 1;
198         i2      += 1;
199       }
200       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
201       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
202     }
203     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
204         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
205       i2    = 0;
206       mdiag = a->idiag+a->mbs;
207       for (i=0; i<m; i++) {
208         x1      = x[i2];
209         x[i2]   = mdiag[0]*x1;
210         mdiag  += 1;
211         i2     += 1;
212       }
213       ierr = PetscLogFlops(m);CHKERRQ(ierr);
214     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
215       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
216     }
217     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
218       idiag   = a->idiag+a->mbs - 1;
219       i2      = m - 1;
220       x1      = x[i2];
221       x[i2]   = idiag[0]*x1;
222       idiag -= 1;
223       i2    -= 1;
224       for (i=m-2; i>=0; i--) {
225         v     = aa + (diag[i]+1);
226         vi    = aj + diag[i] + 1;
227         nz    = ai[i+1] - diag[i] - 1;
228         s1    = x[i2];
229         for (j=0; j<nz; j++) {
230           s1 -= v[j]*x[vi[j]];
231         }
232         x[i2]   = idiag[0]*s1;
233         idiag   -= 1;
234         i2      -= 1;
235       }
236       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
237     }
238   } else {
239     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
240   }
241   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 #undef __FUNCT__
247 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
248 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
249 {
250   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
251   PetscScalar        *x,x1,x2,s1,s2;
252   const PetscScalar  *b;
253   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
254   PetscErrorCode     ierr;
255   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
256   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
257 
258   PetscFunctionBegin;
259   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
260   its = its*lits;
261   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
262   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
263   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
264   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
265   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
266 
267   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
268 
269   diag  = a->diag;
270   idiag = a->idiag;
271   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
272   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
273 
274   if (flag & SOR_ZERO_INITIAL_GUESS) {
275     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
276       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
277       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
278       i2     = 2;
279       idiag += 4;
280       for (i=1; i<m; i++) {
281 	v     = aa + 4*ai[i];
282 	vi    = aj + ai[i];
283 	nz    = diag[i] - ai[i];
284 	s1    = b[i2]; s2 = b[i2+1];
285         for (j=0; j<nz; j++) {
286 	  idx  = 2*vi[j];
287           it   = 4*j;
288 	  x1   = x[idx]; x2 = x[1+idx];
289 	  s1  -= v[it]*x1 + v[it+2]*x2;
290 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
291 	}
292 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
293 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
294         idiag   += 4;
295         i2      += 2;
296       }
297       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
298       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
299     }
300     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
301         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
302       i2    = 0;
303       mdiag = a->idiag+4*a->mbs;
304       for (i=0; i<m; i++) {
305         x1      = x[i2]; x2 = x[i2+1];
306         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
307         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
308         mdiag  += 4;
309         i2     += 2;
310       }
311       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
312     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
313       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
314     }
315     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
316       idiag   = a->idiag+4*a->mbs - 4;
317       i2      = 2*m - 2;
318       x1      = x[i2]; x2 = x[i2+1];
319       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
320       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
321       idiag -= 4;
322       i2    -= 2;
323       for (i=m-2; i>=0; i--) {
324 	v     = aa + 4*(diag[i]+1);
325 	vi    = aj + diag[i] + 1;
326 	nz    = ai[i+1] - diag[i] - 1;
327 	s1    = x[i2]; s2 = x[i2+1];
328         for (j=0; j<nz; j++) {
329  	  idx  = 2*vi[j];
330           it   = 4*j;
331 	  x1   = x[idx]; x2 = x[1+idx];
332 	  s1  -= v[it]*x1 + v[it+2]*x2;
333 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
334 	}
335 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
336 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
337         idiag   -= 4;
338         i2      -= 2;
339       }
340       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
341     }
342   } else {
343     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
344   }
345   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
346   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
352 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
353 {
354   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
355   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
356   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
357   const PetscScalar  *b;
358   PetscErrorCode     ierr;
359   PetscInt           m = a->mbs,i,i2,nz,idx;
360   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
361 
362   PetscFunctionBegin;
363   its = its*lits;
364   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
365   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
366   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
367   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
368   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
369   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
370 
371   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
372 
373   diag  = a->diag;
374   idiag = a->idiag;
375   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
376   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
377 
378   if (flag & SOR_ZERO_INITIAL_GUESS) {
379     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
380       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
381       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
382       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
383       i2     = 3;
384       idiag += 9;
385       for (i=1; i<m; i++) {
386         v     = aa + 9*ai[i];
387         vi    = aj + ai[i];
388         nz    = diag[i] - ai[i];
389         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
390         while (nz--) {
391           idx  = 3*(*vi++);
392           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
393           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
394           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
395           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
396           v   += 9;
397         }
398         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
399         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
400         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
401         idiag   += 9;
402         i2      += 3;
403       }
404       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
405       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
406     }
407     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
408         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
409       i2    = 0;
410       mdiag = a->idiag+9*a->mbs;
411       for (i=0; i<m; i++) {
412         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
413         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
414         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
415         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
416         mdiag  += 9;
417         i2     += 3;
418       }
419       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
420     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
421       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
422     }
423     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
424       idiag   = a->idiag+9*a->mbs - 9;
425       i2      = 3*m - 3;
426       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
427       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
428       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
429       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
430       idiag -= 9;
431       i2    -= 3;
432       for (i=m-2; i>=0; i--) {
433         v     = aa + 9*(diag[i]+1);
434         vi    = aj + diag[i] + 1;
435         nz    = ai[i+1] - diag[i] - 1;
436         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
437         while (nz--) {
438           idx  = 3*(*vi++);
439           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
440           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
441           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
442           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
443           v   += 9;
444         }
445         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
446         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
447         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
448         idiag   -= 9;
449         i2      -= 3;
450       }
451       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
452     }
453   } else {
454     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
455   }
456   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
457   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
463 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
464 {
465   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
466   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
467   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
468   const PetscScalar  *b;
469   PetscErrorCode     ierr;
470   PetscInt           m = a->mbs,i,i2,nz,idx;
471   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
472 
473   PetscFunctionBegin;
474   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
475   its = its*lits;
476   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
477   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
478   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
479   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
480   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
481 
482   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
483 
484   diag  = a->diag;
485   idiag = a->idiag;
486   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
487   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
488 
489   if (flag & SOR_ZERO_INITIAL_GUESS) {
490     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
491       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
492       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
493       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
494       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
495       i2     = 4;
496       idiag += 16;
497       for (i=1; i<m; i++) {
498 	v     = aa + 16*ai[i];
499 	vi    = aj + ai[i];
500 	nz    = diag[i] - ai[i];
501 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
502 	while (nz--) {
503 	  idx  = 4*(*vi++);
504 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
505 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
506 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
507 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
508 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
509 	  v   += 16;
510 	}
511 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
512 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
513 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
514 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
515         idiag   += 16;
516         i2      += 4;
517       }
518       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
519       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
520     }
521     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
522         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
523       i2    = 0;
524       mdiag = a->idiag+16*a->mbs;
525       for (i=0; i<m; i++) {
526         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
527         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
528         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
529         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
530         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
531         mdiag  += 16;
532         i2     += 4;
533       }
534       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
535     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
536       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
537     }
538     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
539       idiag   = a->idiag+16*a->mbs - 16;
540       i2      = 4*m - 4;
541       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
542       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
543       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
544       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
545       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
546       idiag -= 16;
547       i2    -= 4;
548       for (i=m-2; i>=0; i--) {
549 	v     = aa + 16*(diag[i]+1);
550 	vi    = aj + diag[i] + 1;
551 	nz    = ai[i+1] - diag[i] - 1;
552 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
553 	while (nz--) {
554 	  idx  = 4*(*vi++);
555 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
556 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
557 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
558 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
559 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
560 	  v   += 16;
561 	}
562 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
563 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
564 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
565 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
566         idiag   -= 16;
567         i2      -= 4;
568       }
569       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
570     }
571   } else {
572     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
573   }
574   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
575   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
581 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
582 {
583   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
584   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
585   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
586   const PetscScalar  *b;
587   PetscErrorCode     ierr;
588   PetscInt           m = a->mbs,i,i2,nz,idx;
589   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
590 
591   PetscFunctionBegin;
592   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
593   its = its*lits;
594   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
595   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
596   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
597   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
598   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
599 
600   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
601 
602   diag  = a->diag;
603   idiag = a->idiag;
604   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
605   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
606 
607   if (flag & SOR_ZERO_INITIAL_GUESS) {
608     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
609       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
610       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
611       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
612       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
613       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
614       i2     = 5;
615       idiag += 25;
616       for (i=1; i<m; i++) {
617 	v     = aa + 25*ai[i];
618 	vi    = aj + ai[i];
619 	nz    = diag[i] - ai[i];
620 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
621 	while (nz--) {
622 	  idx  = 5*(*vi++);
623 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
624 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
625 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
626 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
627 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
628 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
629 	  v   += 25;
630 	}
631 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
632 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
633 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
634 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
635 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
636         idiag   += 25;
637         i2      += 5;
638       }
639       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
640       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
641     }
642     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
643         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
644       i2    = 0;
645       mdiag = a->idiag+25*a->mbs;
646       for (i=0; i<m; i++) {
647         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
648         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
649         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
650         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
651         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
652         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
653         mdiag  += 25;
654         i2     += 5;
655       }
656       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
657     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
658       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
659     }
660     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
661       idiag   = a->idiag+25*a->mbs - 25;
662       i2      = 5*m - 5;
663       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
664       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
665       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
666       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
667       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
668       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
669       idiag -= 25;
670       i2    -= 5;
671       for (i=m-2; i>=0; i--) {
672 	v     = aa + 25*(diag[i]+1);
673 	vi    = aj + diag[i] + 1;
674 	nz    = ai[i+1] - diag[i] - 1;
675 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
676 	while (nz--) {
677 	  idx  = 5*(*vi++);
678 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
679 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
680 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
681 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
682 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
683 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
684 	  v   += 25;
685 	}
686 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
687 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
688 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
689 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
690 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
691         idiag   -= 25;
692         i2      -= 5;
693       }
694       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
695     }
696   } else {
697     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
698   }
699   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
700   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
706 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
707 {
708   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
709   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
710   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
711   const PetscScalar  *b;
712   PetscErrorCode     ierr;
713   PetscInt           m = a->mbs,i,i2,nz,idx;
714   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
715 
716   PetscFunctionBegin;
717   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
718   its = its*lits;
719   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
720   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
721   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
722   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
723   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
724 
725   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
726 
727   diag  = a->diag;
728   idiag = a->idiag;
729   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
730   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
731 
732   if (flag & SOR_ZERO_INITIAL_GUESS) {
733     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
734       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
735       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
736       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
737       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
738       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
739       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
740       i2     = 6;
741       idiag += 36;
742       for (i=1; i<m; i++) {
743         v     = aa + 36*ai[i];
744         vi    = aj + ai[i];
745         nz    = diag[i] - ai[i];
746         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
747         while (nz--) {
748           idx  = 6*(*vi++);
749           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
750           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
751           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
752           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
753           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
754           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
755           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
756           v   += 36;
757         }
758         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
759         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
760         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
761         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
762         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
763         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
764         idiag   += 36;
765         i2      += 6;
766       }
767       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
768       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
769     }
770     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
771         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
772       i2    = 0;
773       mdiag = a->idiag+36*a->mbs;
774       for (i=0; i<m; i++) {
775         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
776         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
777         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
778         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
779         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
780         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
781         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
782         mdiag  += 36;
783         i2     += 6;
784       }
785       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
786     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
787       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
788     }
789     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
790       idiag   = a->idiag+36*a->mbs - 36;
791       i2      = 6*m - 6;
792       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
793       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
794       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
795       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
796       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
797       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
798       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
799       idiag -= 36;
800       i2    -= 6;
801       for (i=m-2; i>=0; i--) {
802         v     = aa + 36*(diag[i]+1);
803         vi    = aj + diag[i] + 1;
804         nz    = ai[i+1] - diag[i] - 1;
805         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
806         while (nz--) {
807           idx  = 6*(*vi++);
808           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
809           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
810           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
811           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
812           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
813           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
814           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
815           v   += 36;
816         }
817         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
818         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
819         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
820         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
821         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
822         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
823         idiag   -= 36;
824         i2      -= 6;
825       }
826       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
827     }
828   } else {
829     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
830   }
831   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
832   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
838 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
839 {
840   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
841   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
842   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
843   const PetscScalar  *b;
844   PetscErrorCode     ierr;
845   PetscInt           m = a->mbs,i,i2,nz,idx;
846   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
847 
848   PetscFunctionBegin;
849   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
850   its = its*lits;
851   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
852   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
853   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
854   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
855   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
856 
857   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
858 
859   diag  = a->diag;
860   idiag = a->idiag;
861   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
862   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
863 
864   if (flag & SOR_ZERO_INITIAL_GUESS) {
865     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
866       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
867       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
868       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
869       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
870       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
871       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
872       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
873       i2     = 7;
874       idiag += 49;
875       for (i=1; i<m; i++) {
876         v     = aa + 49*ai[i];
877         vi    = aj + ai[i];
878         nz    = diag[i] - ai[i];
879         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
880         while (nz--) {
881           idx  = 7*(*vi++);
882           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
883           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
884           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
885           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
886           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
887           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
888           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
889           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
890           v   += 49;
891         }
892         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
893         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
894         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
895         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
896         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
897         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
898         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
899         idiag   += 49;
900         i2      += 7;
901       }
902       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
903       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
904     }
905     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
906         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
907       i2    = 0;
908       mdiag = a->idiag+49*a->mbs;
909       for (i=0; i<m; i++) {
910         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
911         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
912         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
913         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
914         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
915         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
916         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
917         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
918         mdiag  += 49;
919         i2     += 7;
920       }
921       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
922     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
923       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
924     }
925     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
926       idiag   = a->idiag+49*a->mbs - 49;
927       i2      = 7*m - 7;
928       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
929       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
930       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
931       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
932       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
933       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
934       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
935       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
936       idiag -= 49;
937       i2    -= 7;
938       for (i=m-2; i>=0; i--) {
939         v     = aa + 49*(diag[i]+1);
940         vi    = aj + diag[i] + 1;
941         nz    = ai[i+1] - diag[i] - 1;
942         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
943         while (nz--) {
944           idx  = 7*(*vi++);
945           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
946           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
947           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
948           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
949           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
950           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
951           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
952           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
953           v   += 49;
954         }
955         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
956         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
957         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
958         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
959         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
960         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
961         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
962         idiag   -= 49;
963         i2      -= 7;
964       }
965       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
966     }
967   } else {
968     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
969   }
970   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
971   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
977 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
978 {
979   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
980   PetscScalar        *x,*work,*w,*workt;
981   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
982   const PetscScalar  *b;
983   PetscErrorCode     ierr;
984   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
985   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
986 
987   PetscFunctionBegin;
988   its = its*lits;
989   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
990   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
991   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
992   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
993   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
994   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
995 
996   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
997 
998   diag  = a->diag;
999   idiag = a->idiag;
1000   if (!a->mult_work) {
1001     k    = PetscMax(A->rmap->n,A->cmap->n);
1002     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1003   }
1004   work = a->mult_work;
1005   if (!a->sor_work) {
1006     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
1007   }
1008   w = a->sor_work;
1009 
1010   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1011   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1012 
1013   if (flag & SOR_ZERO_INITIAL_GUESS) {
1014     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1015       Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
1016       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
1017       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
1018       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
1019       i2     = bs;
1020       idiag += bs2;
1021       for (i=1; i<m; i++) {
1022         v     = aa + bs2*ai[i];
1023         vi    = aj + ai[i];
1024         nz    = diag[i] - ai[i];
1025 
1026         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1027 	/* copy all rows of x that are needed into contiguous space */
1028         workt = work;
1029         for (j=0; j<nz; j++) {
1030           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1031           workt += bs;
1032         }
1033         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1034        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1035         while (nz--) {
1036           idx  = N*(*vi++);
1037           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1038           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1039           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1040           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1041           v   += N2;
1042 	  } */
1043 
1044         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1045         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1046         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1047         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1048 
1049         idiag   += bs2;
1050         i2      += bs;
1051       }
1052       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1053       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1054     }
1055     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1056         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1057       i2    = 0;
1058       mdiag = a->idiag+bs2*a->mbs;
1059       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1060       for (i=0; i<m; i++) {
1061         Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1062         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1063         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1064         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1065         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1066 
1067         mdiag  += bs2;
1068         i2     += bs;
1069       }
1070       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1071     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1072       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1073     }
1074     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1075       idiag   = a->idiag+bs2*a->mbs - bs2;
1076       i2      = bs*m - bs;
1077       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1078       Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1079       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1080       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1081       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1082       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1083       idiag -= bs2;
1084       i2    -= bs;
1085       for (i=m-2; i>=0; i--) {
1086         v     = aa + bs2*(diag[i]+1);
1087         vi    = aj + diag[i] + 1;
1088         nz    = ai[i+1] - diag[i] - 1;
1089 
1090         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1091 	/* copy all rows of x that are needed into contiguous space */
1092         workt = work;
1093         for (j=0; j<nz; j++) {
1094           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1095           workt += bs;
1096         }
1097         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1098         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1099         while (nz--) {
1100           idx  = N*(*vi++);
1101           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1102           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1103           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1104           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1105           v   += N2;
1106 	  } */
1107 
1108         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1109         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1110         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1111         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1112         idiag   -= bs2;
1113         i2      -= bs;
1114       }
1115       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1116     }
1117   } else {
1118     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1119   }
1120   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1121   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 /*
1126     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1127 */
1128 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1129 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1130 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1131 #define matsetvaluesblocked4_ matsetvaluesblocked4
1132 #endif
1133 
1134 EXTERN_C_BEGIN
1135 #undef __FUNCT__
1136 #define __FUNCT__ "matsetvaluesblocked4_"
1137 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1138 {
1139   Mat               A = *AA;
1140   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1141   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1142   PetscInt          *ai=a->i,*ailen=a->ilen;
1143   PetscInt          *aj=a->j,stepval,lastcol = -1;
1144   const PetscScalar *value = v;
1145   MatScalar         *ap,*aa = a->a,*bap;
1146 
1147   PetscFunctionBegin;
1148   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1149   stepval = (n-1)*4;
1150   for (k=0; k<m; k++) { /* loop over added rows */
1151     row  = im[k];
1152     rp   = aj + ai[row];
1153     ap   = aa + 16*ai[row];
1154     nrow = ailen[row];
1155     low  = 0;
1156     high = nrow;
1157     for (l=0; l<n; l++) { /* loop over added columns */
1158       col = in[l];
1159       if (col <= lastcol) low = 0; else high = nrow;
1160       lastcol = col;
1161       value = v + k*(stepval+4 + l)*4;
1162       while (high-low > 7) {
1163         t = (low+high)/2;
1164         if (rp[t] > col) high = t;
1165         else             low  = t;
1166       }
1167       for (i=low; i<high; i++) {
1168         if (rp[i] > col) break;
1169         if (rp[i] == col) {
1170           bap  = ap +  16*i;
1171           for (ii=0; ii<4; ii++,value+=stepval) {
1172             for (jj=ii; jj<16; jj+=4) {
1173               bap[jj] += *value++;
1174             }
1175           }
1176           goto noinsert2;
1177         }
1178       }
1179       N = nrow++ - 1;
1180       high++; /* added new column index thus must search to one higher than before */
1181       /* shift up all the later entries in this row */
1182       for (ii=N; ii>=i; ii--) {
1183         rp[ii+1] = rp[ii];
1184         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1185       }
1186       if (N >= i) {
1187         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1188       }
1189       rp[i] = col;
1190       bap   = ap +  16*i;
1191       for (ii=0; ii<4; ii++,value+=stepval) {
1192         for (jj=ii; jj<16; jj+=4) {
1193           bap[jj] = *value++;
1194         }
1195       }
1196       noinsert2:;
1197       low = i;
1198     }
1199     ailen[row] = nrow;
1200   }
1201   PetscFunctionReturnVoid();
1202 }
1203 EXTERN_C_END
1204 
1205 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1206 #define matsetvalues4_ MATSETVALUES4
1207 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1208 #define matsetvalues4_ matsetvalues4
1209 #endif
1210 
1211 EXTERN_C_BEGIN
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatSetValues4_"
1214 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1215 {
1216   Mat         A = *AA;
1217   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1218   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1219   PetscInt    *ai=a->i,*ailen=a->ilen;
1220   PetscInt    *aj=a->j,brow,bcol;
1221   PetscInt    ridx,cidx,lastcol = -1;
1222   MatScalar   *ap,value,*aa=a->a,*bap;
1223 
1224   PetscFunctionBegin;
1225   for (k=0; k<m; k++) { /* loop over added rows */
1226     row  = im[k]; brow = row/4;
1227     rp   = aj + ai[brow];
1228     ap   = aa + 16*ai[brow];
1229     nrow = ailen[brow];
1230     low  = 0;
1231     high = nrow;
1232     for (l=0; l<n; l++) { /* loop over added columns */
1233       col = in[l]; bcol = col/4;
1234       ridx = row % 4; cidx = col % 4;
1235       value = v[l + k*n];
1236       if (col <= lastcol) low = 0; else high = nrow;
1237       lastcol = col;
1238       while (high-low > 7) {
1239         t = (low+high)/2;
1240         if (rp[t] > bcol) high = t;
1241         else              low  = t;
1242       }
1243       for (i=low; i<high; i++) {
1244         if (rp[i] > bcol) break;
1245         if (rp[i] == bcol) {
1246           bap  = ap +  16*i + 4*cidx + ridx;
1247           *bap += value;
1248           goto noinsert1;
1249         }
1250       }
1251       N = nrow++ - 1;
1252       high++; /* added new column thus must search to one higher than before */
1253       /* shift up all the later entries in this row */
1254       for (ii=N; ii>=i; ii--) {
1255         rp[ii+1] = rp[ii];
1256         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1257       }
1258       if (N>=i) {
1259         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1260       }
1261       rp[i]                    = bcol;
1262       ap[16*i + 4*cidx + ridx] = value;
1263       noinsert1:;
1264       low = i;
1265     }
1266     ailen[brow] = nrow;
1267   }
1268   PetscFunctionReturnVoid();
1269 }
1270 EXTERN_C_END
1271 
1272 /*
1273      Checks for missing diagonals
1274 */
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1277 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1278 {
1279   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1280   PetscErrorCode ierr;
1281   PetscInt       *diag,*jj = a->j,i;
1282 
1283   PetscFunctionBegin;
1284   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1285   *missing = PETSC_FALSE;
1286   if (A->rmap->n > 0 && !jj) {
1287     *missing  = PETSC_TRUE;
1288     if (d) *d = 0;
1289     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1290   } else {
1291     diag     = a->diag;
1292     for (i=0; i<a->mbs; i++) {
1293       if (jj[diag[i]] != i) {
1294         *missing  = PETSC_TRUE;
1295         if (d) *d = i;
1296         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1297       }
1298     }
1299   }
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1305 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1306 {
1307   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1308   PetscErrorCode ierr;
1309   PetscInt       i,j,m = a->mbs;
1310 
1311   PetscFunctionBegin;
1312   if (!a->diag) {
1313     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1314     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1315     a->free_diag = PETSC_TRUE;
1316   }
1317   for (i=0; i<m; i++) {
1318     a->diag[i] = a->i[i+1];
1319     for (j=a->i[i]; j<a->i[i+1]; j++) {
1320       if (a->j[j] == i) {
1321         a->diag[i] = j;
1322         break;
1323       }
1324     }
1325   }
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 
1330 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1334 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1335 {
1336   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1337   PetscErrorCode ierr;
1338   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1339   PetscInt       *tia, *tja;
1340 
1341   PetscFunctionBegin;
1342   *nn = n;
1343   if (!ia) PetscFunctionReturn(0);
1344   if (symmetric) {
1345     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1346     nz   = tia[n];
1347   } else {
1348     tia = a->i; tja = a->j;
1349   }
1350 
1351   if (!blockcompressed && bs > 1) {
1352     (*nn) *= bs;
1353     /* malloc & create the natural set of indices */
1354     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1355     if (n) {
1356       (*ia)[0] = 0;
1357       for (j=1; j<bs; j++) {
1358         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1359       }
1360     }
1361 
1362     for (i=1; i<n; i++) {
1363       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1364       for (j=1; j<bs; j++) {
1365         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1366       }
1367     }
1368     if (n) {
1369       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1370     }
1371 
1372     if (ja) {
1373       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1374       cnt = 0;
1375       for (i=0; i<n; i++) {
1376         for (j=0; j<bs; j++) {
1377           for (k=tia[i]; k<tia[i+1]; k++) {
1378             for (l=0; l<bs; l++) {
1379               (*ja)[cnt++] = bs*tja[k] + l;
1380 	    }
1381           }
1382         }
1383       }
1384     }
1385 
1386     n     *= bs;
1387     nz *= bs*bs;
1388     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1389       ierr = PetscFree(tia);CHKERRQ(ierr);
1390       ierr = PetscFree(tja);CHKERRQ(ierr);
1391     }
1392   } else if (oshift == 1) {
1393     if (symmetric) {
1394       PetscInt nz = tia[A->rmap->n/bs];
1395       /*  add 1 to i and j indices */
1396       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1397       *ia = tia;
1398       if (ja) {
1399 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1400         *ja = tja;
1401       }
1402     } else {
1403       PetscInt nz = a->i[A->rmap->n/bs];
1404       /* malloc space and  add 1 to i and j indices */
1405       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1406       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1407       if (ja) {
1408 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1409 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1410       }
1411     }
1412   } else {
1413     *ia = tia;
1414     if (ja) *ja = tja;
1415   }
1416 
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1422 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1423 {
1424   PetscErrorCode ierr;
1425 
1426   PetscFunctionBegin;
1427   if (!ia) PetscFunctionReturn(0);
1428   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1429     ierr = PetscFree(*ia);CHKERRQ(ierr);
1430     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1431   }
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1437 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1438 {
1439   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1440   PetscErrorCode ierr;
1441 
1442   PetscFunctionBegin;
1443 #if defined(PETSC_USE_LOG)
1444   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1445 #endif
1446   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1447   if (a->row) {
1448     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1449   }
1450   if (a->col) {
1451     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1452   }
1453   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1454   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1455   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1456   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1457   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1458   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1459   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1460   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1461   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1462   if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);}
1463 
1464   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1465   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
1466   ierr = PetscFree(a);CHKERRQ(ierr);
1467 
1468   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1469   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1470   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1471   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1472   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1473   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1474   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1475   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1476   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1482 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1483 {
1484   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1485   PetscErrorCode ierr;
1486 
1487   PetscFunctionBegin;
1488   switch (op) {
1489   case MAT_ROW_ORIENTED:
1490     a->roworiented    = flg;
1491     break;
1492   case MAT_KEEP_NONZERO_PATTERN:
1493     a->keepnonzeropattern = flg;
1494     break;
1495   case MAT_NEW_NONZERO_LOCATIONS:
1496     a->nonew          = (flg ? 0 : 1);
1497     break;
1498   case MAT_NEW_NONZERO_LOCATION_ERR:
1499     a->nonew          = (flg ? -1 : 0);
1500     break;
1501   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1502     a->nonew          = (flg ? -2 : 0);
1503     break;
1504   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1505     a->nounused       = (flg ? -1 : 0);
1506     break;
1507   case MAT_NEW_DIAGONALS:
1508   case MAT_IGNORE_OFF_PROC_ENTRIES:
1509   case MAT_USE_HASH_TABLE:
1510     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1511     break;
1512   case MAT_SYMMETRIC:
1513   case MAT_STRUCTURALLY_SYMMETRIC:
1514   case MAT_HERMITIAN:
1515   case MAT_SYMMETRY_ETERNAL:
1516     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1517     break;
1518   default:
1519     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1520   }
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 #undef __FUNCT__
1525 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1526 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1527 {
1528   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1529   PetscErrorCode ierr;
1530   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1531   MatScalar      *aa,*aa_i;
1532   PetscScalar    *v_i;
1533 
1534   PetscFunctionBegin;
1535   bs  = A->rmap->bs;
1536   ai  = a->i;
1537   aj  = a->j;
1538   aa  = a->a;
1539   bs2 = a->bs2;
1540 
1541   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1542 
1543   bn  = row/bs;   /* Block number */
1544   bp  = row % bs; /* Block Position */
1545   M   = ai[bn+1] - ai[bn];
1546   *nz = bs*M;
1547 
1548   if (v) {
1549     *v = 0;
1550     if (*nz) {
1551       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1552       for (i=0; i<M; i++) { /* for each block in the block row */
1553         v_i  = *v + i*bs;
1554         aa_i = aa + bs2*(ai[bn] + i);
1555         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1556       }
1557     }
1558   }
1559 
1560   if (idx) {
1561     *idx = 0;
1562     if (*nz) {
1563       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1564       for (i=0; i<M; i++) { /* for each block in the block row */
1565         idx_i = *idx + i*bs;
1566         itmp  = bs*aj[ai[bn] + i];
1567         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1568       }
1569     }
1570   }
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 #undef __FUNCT__
1575 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1576 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1577 {
1578   PetscErrorCode ierr;
1579 
1580   PetscFunctionBegin;
1581   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1582   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1587 
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1590 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1591 {
1592   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1593   Mat            C;
1594   PetscErrorCode ierr;
1595   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1596   PetscInt       *rows,*cols,bs2=a->bs2;
1597   MatScalar      *array;
1598 
1599   PetscFunctionBegin;
1600   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1601   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1602     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1603     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1604 
1605     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1606     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1607     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1608     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1609     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1610     ierr = PetscFree(col);CHKERRQ(ierr);
1611   } else {
1612     C = *B;
1613   }
1614 
1615   array = a->a;
1616   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1617   for (i=0; i<mbs; i++) {
1618     cols[0] = i*bs;
1619     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1620     len = ai[i+1] - ai[i];
1621     for (j=0; j<len; j++) {
1622       rows[0] = (*aj++)*bs;
1623       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1624       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1625       array += bs2;
1626     }
1627   }
1628   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1629 
1630   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1631   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1632 
1633   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1634     *B = C;
1635   } else {
1636     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1637   }
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 #undef __FUNCT__
1642 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1643 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1644 {
1645   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1646   PetscErrorCode ierr;
1647   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1648   int            fd;
1649   PetscScalar    *aa;
1650   FILE           *file;
1651 
1652   PetscFunctionBegin;
1653   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1654   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1655   col_lens[0] = MAT_FILE_CLASSID;
1656 
1657   col_lens[1] = A->rmap->N;
1658   col_lens[2] = A->cmap->n;
1659   col_lens[3] = a->nz*bs2;
1660 
1661   /* store lengths of each row and write (including header) to file */
1662   count = 0;
1663   for (i=0; i<a->mbs; i++) {
1664     for (j=0; j<bs; j++) {
1665       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1666     }
1667   }
1668   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1669   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1670 
1671   /* store column indices (zero start index) */
1672   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1673   count = 0;
1674   for (i=0; i<a->mbs; i++) {
1675     for (j=0; j<bs; j++) {
1676       for (k=a->i[i]; k<a->i[i+1]; k++) {
1677         for (l=0; l<bs; l++) {
1678           jj[count++] = bs*a->j[k] + l;
1679         }
1680       }
1681     }
1682   }
1683   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1684   ierr = PetscFree(jj);CHKERRQ(ierr);
1685 
1686   /* store nonzero values */
1687   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1688   count = 0;
1689   for (i=0; i<a->mbs; i++) {
1690     for (j=0; j<bs; j++) {
1691       for (k=a->i[i]; k<a->i[i+1]; k++) {
1692         for (l=0; l<bs; l++) {
1693           aa[count++] = a->a[bs2*k + l*bs + j];
1694         }
1695       }
1696     }
1697   }
1698   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1699   ierr = PetscFree(aa);CHKERRQ(ierr);
1700 
1701   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1702   if (file) {
1703     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1704   }
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1710 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1711 {
1712   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1713   PetscErrorCode    ierr;
1714   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1715   PetscViewerFormat format;
1716 
1717   PetscFunctionBegin;
1718   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1719   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1720     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1721   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1722     Mat aij;
1723     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1724     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1725     ierr = MatDestroy(aij);CHKERRQ(ierr);
1726   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1727      PetscFunctionReturn(0);
1728   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1729     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1730     for (i=0; i<a->mbs; i++) {
1731       for (j=0; j<bs; j++) {
1732         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1733         for (k=a->i[i]; k<a->i[i+1]; k++) {
1734           for (l=0; l<bs; l++) {
1735 #if defined(PETSC_USE_COMPLEX)
1736             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1737               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1738                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1739             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1740               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1741                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1742             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1743               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1744             }
1745 #else
1746             if (a->a[bs2*k + l*bs + j] != 0.0) {
1747               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1748             }
1749 #endif
1750           }
1751         }
1752         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1753       }
1754     }
1755     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1756   } else {
1757     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1758     for (i=0; i<a->mbs; i++) {
1759       for (j=0; j<bs; j++) {
1760         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1761         for (k=a->i[i]; k<a->i[i+1]; k++) {
1762           for (l=0; l<bs; l++) {
1763 #if defined(PETSC_USE_COMPLEX)
1764             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1765               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1766                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1767             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1768               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1769                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1770             } else {
1771               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1772             }
1773 #else
1774             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1775 #endif
1776           }
1777         }
1778         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1779       }
1780     }
1781     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1782   }
1783   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 #undef __FUNCT__
1788 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1789 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1790 {
1791   Mat            A = (Mat) Aa;
1792   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1793   PetscErrorCode ierr;
1794   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1795   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1796   MatScalar      *aa;
1797   PetscViewer    viewer;
1798 
1799   PetscFunctionBegin;
1800 
1801   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1802   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1803 
1804   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1805 
1806   /* loop over matrix elements drawing boxes */
1807   color = PETSC_DRAW_BLUE;
1808   for (i=0,row=0; i<mbs; i++,row+=bs) {
1809     for (j=a->i[i]; j<a->i[i+1]; j++) {
1810       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1811       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1812       aa = a->a + j*bs2;
1813       for (k=0; k<bs; k++) {
1814         for (l=0; l<bs; l++) {
1815           if (PetscRealPart(*aa++) >=  0.) continue;
1816           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1817         }
1818       }
1819     }
1820   }
1821   color = PETSC_DRAW_CYAN;
1822   for (i=0,row=0; i<mbs; i++,row+=bs) {
1823     for (j=a->i[i]; j<a->i[i+1]; j++) {
1824       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1825       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1826       aa = a->a + j*bs2;
1827       for (k=0; k<bs; k++) {
1828         for (l=0; l<bs; l++) {
1829           if (PetscRealPart(*aa++) != 0.) continue;
1830           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1831         }
1832       }
1833     }
1834   }
1835 
1836   color = PETSC_DRAW_RED;
1837   for (i=0,row=0; i<mbs; i++,row+=bs) {
1838     for (j=a->i[i]; j<a->i[i+1]; j++) {
1839       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1840       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1841       aa = a->a + j*bs2;
1842       for (k=0; k<bs; k++) {
1843         for (l=0; l<bs; l++) {
1844           if (PetscRealPart(*aa++) <= 0.) continue;
1845           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1846         }
1847       }
1848     }
1849   }
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 #undef __FUNCT__
1854 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1855 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1856 {
1857   PetscErrorCode ierr;
1858   PetscReal      xl,yl,xr,yr,w,h;
1859   PetscDraw      draw;
1860   PetscTruth     isnull;
1861 
1862   PetscFunctionBegin;
1863 
1864   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1865   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1866 
1867   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1868   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1869   xr += w;    yr += h;  xl = -w;     yl = -h;
1870   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1871   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1872   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 #undef __FUNCT__
1877 #define __FUNCT__ "MatView_SeqBAIJ"
1878 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1879 {
1880   PetscErrorCode ierr;
1881   PetscTruth     iascii,isbinary,isdraw;
1882 
1883   PetscFunctionBegin;
1884   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1885   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1886   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1887   if (iascii){
1888     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1889   } else if (isbinary) {
1890     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1891   } else if (isdraw) {
1892     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1893   } else {
1894     Mat B;
1895     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1896     ierr = MatView(B,viewer);CHKERRQ(ierr);
1897     ierr = MatDestroy(B);CHKERRQ(ierr);
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1905 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1906 {
1907   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1908   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1909   PetscInt    *ai = a->i,*ailen = a->ilen;
1910   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1911   MatScalar   *ap,*aa = a->a;
1912 
1913   PetscFunctionBegin;
1914   for (k=0; k<m; k++) { /* loop over rows */
1915     row  = im[k]; brow = row/bs;
1916     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1917     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1918     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1919     nrow = ailen[brow];
1920     for (l=0; l<n; l++) { /* loop over columns */
1921       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1922       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1923       col  = in[l] ;
1924       bcol = col/bs;
1925       cidx = col%bs;
1926       ridx = row%bs;
1927       high = nrow;
1928       low  = 0; /* assume unsorted */
1929       while (high-low > 5) {
1930         t = (low+high)/2;
1931         if (rp[t] > bcol) high = t;
1932         else             low  = t;
1933       }
1934       for (i=low; i<high; i++) {
1935         if (rp[i] > bcol) break;
1936         if (rp[i] == bcol) {
1937           *v++ = ap[bs2*i+bs*cidx+ridx];
1938           goto finished;
1939         }
1940       }
1941       *v++ = 0.0;
1942       finished:;
1943     }
1944   }
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1950 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1951 {
1952   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1953   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1954   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1955   PetscErrorCode    ierr;
1956   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1957   PetscTruth        roworiented=a->roworiented;
1958   const PetscScalar *value = v;
1959   MatScalar         *ap,*aa = a->a,*bap;
1960 
1961   PetscFunctionBegin;
1962   if (roworiented) {
1963     stepval = (n-1)*bs;
1964   } else {
1965     stepval = (m-1)*bs;
1966   }
1967   for (k=0; k<m; k++) { /* loop over added rows */
1968     row  = im[k];
1969     if (row < 0) continue;
1970 #if defined(PETSC_USE_DEBUG)
1971     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1972 #endif
1973     rp   = aj + ai[row];
1974     ap   = aa + bs2*ai[row];
1975     rmax = imax[row];
1976     nrow = ailen[row];
1977     low  = 0;
1978     high = nrow;
1979     for (l=0; l<n; l++) { /* loop over added columns */
1980       if (in[l] < 0) continue;
1981 #if defined(PETSC_USE_DEBUG)
1982       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1983 #endif
1984       col = in[l];
1985       if (roworiented) {
1986         value = v + (k*(stepval+bs) + l)*bs;
1987       } else {
1988         value = v + (l*(stepval+bs) + k)*bs;
1989       }
1990       if (col <= lastcol) low = 0; else high = nrow;
1991       lastcol = col;
1992       while (high-low > 7) {
1993         t = (low+high)/2;
1994         if (rp[t] > col) high = t;
1995         else             low  = t;
1996       }
1997       for (i=low; i<high; i++) {
1998         if (rp[i] > col) break;
1999         if (rp[i] == col) {
2000           bap  = ap +  bs2*i;
2001           if (roworiented) {
2002             if (is == ADD_VALUES) {
2003               for (ii=0; ii<bs; ii++,value+=stepval) {
2004                 for (jj=ii; jj<bs2; jj+=bs) {
2005                   bap[jj] += *value++;
2006                 }
2007               }
2008             } else {
2009               for (ii=0; ii<bs; ii++,value+=stepval) {
2010                 for (jj=ii; jj<bs2; jj+=bs) {
2011                   bap[jj] = *value++;
2012                 }
2013               }
2014             }
2015           } else {
2016             if (is == ADD_VALUES) {
2017               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2018                 for (jj=0; jj<bs; jj++) {
2019                   bap[jj] += value[jj];
2020                 }
2021                 bap += bs;
2022               }
2023             } else {
2024               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2025                 for (jj=0; jj<bs; jj++) {
2026                   bap[jj]  = value[jj];
2027                 }
2028                 bap += bs;
2029               }
2030             }
2031           }
2032           goto noinsert2;
2033         }
2034       }
2035       if (nonew == 1) goto noinsert2;
2036       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2037       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2038       N = nrow++ - 1; high++;
2039       /* shift up all the later entries in this row */
2040       for (ii=N; ii>=i; ii--) {
2041         rp[ii+1] = rp[ii];
2042         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2043       }
2044       if (N >= i) {
2045         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2046       }
2047       rp[i] = col;
2048       bap   = ap +  bs2*i;
2049       if (roworiented) {
2050         for (ii=0; ii<bs; ii++,value+=stepval) {
2051           for (jj=ii; jj<bs2; jj+=bs) {
2052             bap[jj] = *value++;
2053           }
2054         }
2055       } else {
2056         for (ii=0; ii<bs; ii++,value+=stepval) {
2057           for (jj=0; jj<bs; jj++) {
2058             *bap++  = *value++;
2059           }
2060         }
2061       }
2062       noinsert2:;
2063       low = i;
2064     }
2065     ailen[row] = nrow;
2066   }
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 #undef __FUNCT__
2071 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2072 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2073 {
2074   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2075   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2076   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2077   PetscErrorCode ierr;
2078   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2079   MatScalar      *aa = a->a,*ap;
2080   PetscReal      ratio=0.6;
2081 
2082   PetscFunctionBegin;
2083   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2084 
2085   if (m) rmax = ailen[0];
2086   for (i=1; i<mbs; i++) {
2087     /* move each row back by the amount of empty slots (fshift) before it*/
2088     fshift += imax[i-1] - ailen[i-1];
2089     rmax   = PetscMax(rmax,ailen[i]);
2090     if (fshift) {
2091       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2092       N = ailen[i];
2093       for (j=0; j<N; j++) {
2094         ip[j-fshift] = ip[j];
2095         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2096       }
2097     }
2098     ai[i] = ai[i-1] + ailen[i-1];
2099   }
2100   if (mbs) {
2101     fshift += imax[mbs-1] - ailen[mbs-1];
2102     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2103   }
2104   /* reset ilen and imax for each row */
2105   for (i=0; i<mbs; i++) {
2106     ailen[i] = imax[i] = ai[i+1] - ai[i];
2107   }
2108   a->nz = ai[mbs];
2109 
2110   /* diagonals may have moved, so kill the diagonal pointers */
2111   a->idiagvalid = PETSC_FALSE;
2112   if (fshift && a->diag) {
2113     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2114     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2115     a->diag = 0;
2116   }
2117   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2118   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2119   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2120   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2121   A->info.mallocs     += a->reallocs;
2122   a->reallocs          = 0;
2123   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2124 
2125   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
2126   if (a->compressedrow.use){
2127     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2128   }
2129 
2130   A->same_nonzero = PETSC_TRUE;
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 /*
2135    This function returns an array of flags which indicate the locations of contiguous
2136    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2137    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2138    Assume: sizes should be long enough to hold all the values.
2139 */
2140 #undef __FUNCT__
2141 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2142 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2143 {
2144   PetscInt   i,j,k,row;
2145   PetscTruth flg;
2146 
2147   PetscFunctionBegin;
2148   for (i=0,j=0; i<n; j++) {
2149     row = idx[i];
2150     if (row%bs!=0) { /* Not the begining of a block */
2151       sizes[j] = 1;
2152       i++;
2153     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2154       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2155       i++;
2156     } else { /* Begining of the block, so check if the complete block exists */
2157       flg = PETSC_TRUE;
2158       for (k=1; k<bs; k++) {
2159         if (row+k != idx[i+k]) { /* break in the block */
2160           flg = PETSC_FALSE;
2161           break;
2162         }
2163       }
2164       if (flg) { /* No break in the bs */
2165         sizes[j] = bs;
2166         i+= bs;
2167       } else {
2168         sizes[j] = 1;
2169         i++;
2170       }
2171     }
2172   }
2173   *bs_max = j;
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 #undef __FUNCT__
2178 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2179 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2180 {
2181   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2182   PetscErrorCode ierr;
2183   PetscInt       i,j,k,count,*rows;
2184   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2185   PetscScalar    zero = 0.0;
2186   MatScalar      *aa;
2187 
2188   PetscFunctionBegin;
2189   /* Make a copy of the IS and  sort it */
2190   /* allocate memory for rows,sizes */
2191   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2192 
2193   /* copy IS values to rows, and sort them */
2194   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2195   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2196   if (baij->keepnonzeropattern) {
2197     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2198     bs_max = is_n;
2199     A->same_nonzero = PETSC_TRUE;
2200   } else {
2201     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2202     A->same_nonzero = PETSC_FALSE;
2203   }
2204 
2205   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2206     row   = rows[j];
2207     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2208     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2209     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2210     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2211       if (diag != 0.0) {
2212         if (baij->ilen[row/bs] > 0) {
2213           baij->ilen[row/bs]       = 1;
2214           baij->j[baij->i[row/bs]] = row/bs;
2215           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2216         }
2217         /* Now insert all the diagonal values for this bs */
2218         for (k=0; k<bs; k++) {
2219           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2220         }
2221       } else { /* (diag == 0.0) */
2222         baij->ilen[row/bs] = 0;
2223       } /* end (diag == 0.0) */
2224     } else { /* (sizes[i] != bs) */
2225 #if defined (PETSC_USE_DEBUG)
2226       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2227 #endif
2228       for (k=0; k<count; k++) {
2229         aa[0] =  zero;
2230         aa    += bs;
2231       }
2232       if (diag != 0.0) {
2233         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2234       }
2235     }
2236   }
2237 
2238   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2239   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2240   PetscFunctionReturn(0);
2241 }
2242 
2243 #undef __FUNCT__
2244 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2245 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2246 {
2247   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2248   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2249   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2250   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2251   PetscErrorCode ierr;
2252   PetscInt       ridx,cidx,bs2=a->bs2;
2253   PetscTruth     roworiented=a->roworiented;
2254   MatScalar      *ap,value,*aa=a->a,*bap;
2255 
2256   PetscFunctionBegin;
2257   if (v) PetscValidScalarPointer(v,6);
2258   for (k=0; k<m; k++) { /* loop over added rows */
2259     row  = im[k];
2260     brow = row/bs;
2261     if (row < 0) continue;
2262 #if defined(PETSC_USE_DEBUG)
2263     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2264 #endif
2265     rp   = aj + ai[brow];
2266     ap   = aa + bs2*ai[brow];
2267     rmax = imax[brow];
2268     nrow = ailen[brow];
2269     low  = 0;
2270     high = nrow;
2271     for (l=0; l<n; l++) { /* loop over added columns */
2272       if (in[l] < 0) continue;
2273 #if defined(PETSC_USE_DEBUG)
2274       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2275 #endif
2276       col = in[l]; bcol = col/bs;
2277       ridx = row % bs; cidx = col % bs;
2278       if (roworiented) {
2279         value = v[l + k*n];
2280       } else {
2281         value = v[k + l*m];
2282       }
2283       if (col <= lastcol) low = 0; else high = nrow;
2284       lastcol = col;
2285       while (high-low > 7) {
2286         t = (low+high)/2;
2287         if (rp[t] > bcol) high = t;
2288         else              low  = t;
2289       }
2290       for (i=low; i<high; i++) {
2291         if (rp[i] > bcol) break;
2292         if (rp[i] == bcol) {
2293           bap  = ap +  bs2*i + bs*cidx + ridx;
2294           if (is == ADD_VALUES) *bap += value;
2295           else                  *bap  = value;
2296           goto noinsert1;
2297         }
2298       }
2299       if (nonew == 1) goto noinsert1;
2300       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2301       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2302       N = nrow++ - 1; high++;
2303       /* shift up all the later entries in this row */
2304       for (ii=N; ii>=i; ii--) {
2305         rp[ii+1] = rp[ii];
2306         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2307       }
2308       if (N>=i) {
2309         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2310       }
2311       rp[i]                      = bcol;
2312       ap[bs2*i + bs*cidx + ridx] = value;
2313       a->nz++;
2314       noinsert1:;
2315       low = i;
2316     }
2317     ailen[brow] = nrow;
2318   }
2319   A->same_nonzero = PETSC_FALSE;
2320   PetscFunctionReturn(0);
2321 }
2322 
2323 #undef __FUNCT__
2324 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2325 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2326 {
2327   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2328   Mat            outA;
2329   PetscErrorCode ierr;
2330   PetscTruth     row_identity,col_identity;
2331 
2332   PetscFunctionBegin;
2333   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2334   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2335   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2336   if (!row_identity || !col_identity) {
2337     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2338   }
2339 
2340   outA            = inA;
2341   inA->factortype = MAT_FACTOR_LU;
2342 
2343   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2344 
2345   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2346   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2347   a->row = row;
2348   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2349   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2350   a->col = col;
2351 
2352   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2353   if (a->icol) {
2354     ierr = ISDestroy(a->icol);CHKERRQ(ierr);
2355   }
2356   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2357   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2358 
2359   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2360   if (!a->solve_work) {
2361     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2362     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2363   }
2364   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2365 
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 EXTERN_C_BEGIN
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2372 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2373 {
2374   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2375   PetscInt    i,nz,mbs;
2376 
2377   PetscFunctionBegin;
2378   nz  = baij->maxnz/baij->bs2;
2379   mbs = baij->mbs;
2380   for (i=0; i<nz; i++) {
2381     baij->j[i] = indices[i];
2382   }
2383   baij->nz = nz;
2384   for (i=0; i<mbs; i++) {
2385     baij->ilen[i] = baij->imax[i];
2386   }
2387   PetscFunctionReturn(0);
2388 }
2389 EXTERN_C_END
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2393 /*@
2394     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2395        in the matrix.
2396 
2397   Input Parameters:
2398 +  mat - the SeqBAIJ matrix
2399 -  indices - the column indices
2400 
2401   Level: advanced
2402 
2403   Notes:
2404     This can be called if you have precomputed the nonzero structure of the
2405   matrix and want to provide it to the matrix object to improve the performance
2406   of the MatSetValues() operation.
2407 
2408     You MUST have set the correct numbers of nonzeros per row in the call to
2409   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2410 
2411     MUST be called before any calls to MatSetValues();
2412 
2413 @*/
2414 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2415 {
2416   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2417 
2418   PetscFunctionBegin;
2419   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2420   PetscValidPointer(indices,2);
2421   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2422   if (f) {
2423     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2424   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 #undef __FUNCT__
2429 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2430 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2431 {
2432   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2433   PetscErrorCode ierr;
2434   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2435   PetscReal      atmp;
2436   PetscScalar    *x,zero = 0.0;
2437   MatScalar      *aa;
2438   PetscInt       ncols,brow,krow,kcol;
2439 
2440   PetscFunctionBegin;
2441   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2442   bs   = A->rmap->bs;
2443   aa   = a->a;
2444   ai   = a->i;
2445   aj   = a->j;
2446   mbs  = a->mbs;
2447 
2448   ierr = VecSet(v,zero);CHKERRQ(ierr);
2449   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2450   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2451   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2452   for (i=0; i<mbs; i++) {
2453     ncols = ai[1] - ai[0]; ai++;
2454     brow  = bs*i;
2455     for (j=0; j<ncols; j++){
2456       for (kcol=0; kcol<bs; kcol++){
2457         for (krow=0; krow<bs; krow++){
2458           atmp = PetscAbsScalar(*aa);aa++;
2459           row = brow + krow;    /* row index */
2460           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2461           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2462         }
2463       }
2464       aj++;
2465     }
2466   }
2467   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "MatCopy_SeqBAIJ"
2473 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2474 {
2475   PetscErrorCode ierr;
2476 
2477   PetscFunctionBegin;
2478   /* If the two matrices have the same copy implementation, use fast copy. */
2479   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2480     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2481     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2482 
2483     if (a->i[A->rmap->N] != b->i[B->rmap->N]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2484     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2485   } else {
2486     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2487   }
2488   PetscFunctionReturn(0);
2489 }
2490 
2491 #undef __FUNCT__
2492 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2493 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2494 {
2495   PetscErrorCode ierr;
2496 
2497   PetscFunctionBegin;
2498   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2499   PetscFunctionReturn(0);
2500 }
2501 
2502 #undef __FUNCT__
2503 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2504 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2505 {
2506   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2507   PetscFunctionBegin;
2508   *array = a->a;
2509   PetscFunctionReturn(0);
2510 }
2511 
2512 #undef __FUNCT__
2513 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2514 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2515 {
2516   PetscFunctionBegin;
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 #undef __FUNCT__
2521 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2522 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2523 {
2524   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2525   PetscErrorCode ierr;
2526   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2527   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2528 
2529   PetscFunctionBegin;
2530   if (str == SAME_NONZERO_PATTERN) {
2531     PetscScalar alpha = a;
2532     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2533   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2534     if (y->xtoy && y->XtoY != X) {
2535       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2536       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2537     }
2538     if (!y->xtoy) { /* get xtoy */
2539       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2540       y->XtoY = X;
2541       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2542     }
2543     bs2 = bs*bs;
2544     for (i=0; i<x->nz; i++) {
2545       j = 0;
2546       while (j < bs2){
2547         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2548         j++;
2549       }
2550     }
2551     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2552   } else {
2553     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2554   }
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 #undef __FUNCT__
2559 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2560 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2561 {
2562   PetscInt rbs,cbs;
2563   PetscErrorCode ierr;
2564 
2565   PetscFunctionBegin;
2566   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2567   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2568   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2569   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 #undef __FUNCT__
2574 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2575 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2576 {
2577   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2578   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2579   MatScalar      *aa = a->a;
2580 
2581   PetscFunctionBegin;
2582   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 #undef __FUNCT__
2587 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2588 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2589 {
2590   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2591   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2592   MatScalar      *aa = a->a;
2593 
2594   PetscFunctionBegin;
2595   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2596   PetscFunctionReturn(0);
2597 }
2598 
2599 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2600 
2601 #undef __FUNCT__
2602 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2603 /*
2604     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2605 */
2606 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2607 {
2608   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2609   PetscErrorCode ierr;
2610   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2611   PetscInt       nz = a->i[m],row,*jj,mr,col;
2612 
2613   PetscFunctionBegin;
2614   *nn = n;
2615   if (!ia) PetscFunctionReturn(0);
2616   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2617   else {
2618     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2619     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2620     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2621     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2622     jj = a->j;
2623     for (i=0; i<nz; i++) {
2624       collengths[jj[i]]++;
2625     }
2626     cia[0] = oshift;
2627     for (i=0; i<n; i++) {
2628       cia[i+1] = cia[i] + collengths[i];
2629     }
2630     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2631     jj   = a->j;
2632     for (row=0; row<m; row++) {
2633       mr = a->i[row+1] - a->i[row];
2634       for (i=0; i<mr; i++) {
2635         col = *jj++;
2636         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2637       }
2638     }
2639     ierr = PetscFree(collengths);CHKERRQ(ierr);
2640     *ia = cia; *ja = cja;
2641   }
2642   PetscFunctionReturn(0);
2643 }
2644 
2645 #undef __FUNCT__
2646 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2647 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2648 {
2649   PetscErrorCode ierr;
2650 
2651   PetscFunctionBegin;
2652   if (!ia) PetscFunctionReturn(0);
2653   ierr = PetscFree(*ia);CHKERRQ(ierr);
2654   ierr = PetscFree(*ja);CHKERRQ(ierr);
2655   PetscFunctionReturn(0);
2656 }
2657 
2658 #undef __FUNCT__
2659 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2660 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2661 {
2662   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2663   PetscErrorCode ierr;
2664   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2665   PetscScalar    dx,*y,*xx,*w3_array;
2666   PetscScalar    *vscale_array;
2667   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2668   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2669   void           *fctx = coloring->fctx;
2670   PetscTruth     flg = PETSC_FALSE;
2671   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2672   Vec            x1_tmp;
2673 
2674   PetscFunctionBegin;
2675   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2676   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2677   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2678   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2679 
2680   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2681   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2682   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2683   if (flg) {
2684     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2685   } else {
2686     PetscTruth assembled;
2687     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2688     if (assembled) {
2689       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2690     }
2691   }
2692 
2693   x1_tmp = x1;
2694   if (!coloring->vscale){
2695     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2696   }
2697 
2698   /*
2699     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2700     coloring->F for the coarser grids from the finest
2701   */
2702   if (coloring->F) {
2703     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2704     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2705     if (m1 != m2) {
2706       coloring->F = 0;
2707       }
2708     }
2709 
2710   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2711     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2712   }
2713   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2714 
2715   /* Set w1 = F(x1) */
2716   if (coloring->F) {
2717     w1          = coloring->F; /* use already computed value of function */
2718     coloring->F = 0;
2719   } else {
2720     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2721     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2722     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2723   }
2724 
2725   if (!coloring->w3) {
2726     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2727     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2728   }
2729   w3 = coloring->w3;
2730 
2731     CHKMEMQ;
2732     /* Compute all the local scale factors, including ghost points */
2733   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2734   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2735   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2736   if (ctype == IS_COLORING_GHOSTED){
2737     col_start = 0; col_end = N;
2738   } else if (ctype == IS_COLORING_GLOBAL){
2739     xx = xx - start;
2740     vscale_array = vscale_array - start;
2741     col_start = start; col_end = N + start;
2742   }    CHKMEMQ;
2743   for (col=col_start; col<col_end; col++){
2744     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2745     if (coloring->htype[0] == 'w') {
2746       dx = 1.0 + unorm;
2747     } else {
2748       dx  = xx[col];
2749     }
2750     if (dx == 0.0) dx = 1.0;
2751 #if !defined(PETSC_USE_COMPLEX)
2752     if (dx < umin && dx >= 0.0)      dx = umin;
2753     else if (dx < 0.0 && dx > -umin) dx = -umin;
2754 #else
2755     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2756     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2757 #endif
2758     dx               *= epsilon;
2759     vscale_array[col] = 1.0/dx;
2760   }     CHKMEMQ;
2761   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2762   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2763   if (ctype == IS_COLORING_GLOBAL){
2764     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2765     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2766   }
2767   CHKMEMQ;
2768   if (coloring->vscaleforrow) {
2769     vscaleforrow = coloring->vscaleforrow;
2770   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2771 
2772   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2773   /*
2774     Loop over each color
2775   */
2776   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2777   for (k=0; k<coloring->ncolors; k++) {
2778     coloring->currentcolor = k;
2779     for (i=0; i<bs; i++) {
2780       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2781       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2782       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2783       /*
2784 	Loop over each column associated with color
2785 	adding the perturbation to the vector w3.
2786       */
2787       for (l=0; l<coloring->ncolumns[k]; l++) {
2788 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2789 	if (coloring->htype[0] == 'w') {
2790 	  dx = 1.0 + unorm;
2791 	} else {
2792 	  dx  = xx[col];
2793 	}
2794 	if (dx == 0.0) dx = 1.0;
2795 #if !defined(PETSC_USE_COMPLEX)
2796 	if (dx < umin && dx >= 0.0)      dx = umin;
2797 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2798 #else
2799 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2800 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2801 #endif
2802 	dx            *= epsilon;
2803 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2804 	w3_array[col] += dx;
2805       }
2806       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2807       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2808 
2809       /*
2810 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2811 	w2 = F(x1 + dx) - F(x1)
2812       */
2813       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2814       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2815       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2816       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2817 
2818       /*
2819 	Loop over rows of vector, putting results into Jacobian matrix
2820       */
2821       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2822       for (l=0; l<coloring->nrows[k]; l++) {
2823 	row    = bs*coloring->rows[k][l];             /* local row index */
2824 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2825         for (j=0; j<bs; j++) {
2826   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2827           srows[j]  = row + start + j;
2828         }
2829 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2830       }
2831       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2832     }
2833   } /* endof for each color */
2834   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2835   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2836   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2837   ierr = PetscFree(srows);CHKERRQ(ierr);
2838 
2839   coloring->currentcolor = -1;
2840   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2841   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2842   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2843   PetscFunctionReturn(0);
2844 }
2845 
2846 /* -------------------------------------------------------------------*/
2847 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2848        MatGetRow_SeqBAIJ,
2849        MatRestoreRow_SeqBAIJ,
2850        MatMult_SeqBAIJ_N,
2851 /* 4*/ MatMultAdd_SeqBAIJ_N,
2852        MatMultTranspose_SeqBAIJ,
2853        MatMultTransposeAdd_SeqBAIJ,
2854        0,
2855        0,
2856        0,
2857 /*10*/ 0,
2858        MatLUFactor_SeqBAIJ,
2859        0,
2860        0,
2861        MatTranspose_SeqBAIJ,
2862 /*15*/ MatGetInfo_SeqBAIJ,
2863        MatEqual_SeqBAIJ,
2864        MatGetDiagonal_SeqBAIJ,
2865        MatDiagonalScale_SeqBAIJ,
2866        MatNorm_SeqBAIJ,
2867 /*20*/ 0,
2868        MatAssemblyEnd_SeqBAIJ,
2869        MatSetOption_SeqBAIJ,
2870        MatZeroEntries_SeqBAIJ,
2871 /*24*/ MatZeroRows_SeqBAIJ,
2872        0,
2873        0,
2874        0,
2875        0,
2876 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2877        0,
2878        0,
2879        MatGetArray_SeqBAIJ,
2880        MatRestoreArray_SeqBAIJ,
2881 /*34*/ MatDuplicate_SeqBAIJ,
2882        0,
2883        0,
2884        MatILUFactor_SeqBAIJ,
2885        0,
2886 /*39*/ MatAXPY_SeqBAIJ,
2887        MatGetSubMatrices_SeqBAIJ,
2888        MatIncreaseOverlap_SeqBAIJ,
2889        MatGetValues_SeqBAIJ,
2890        MatCopy_SeqBAIJ,
2891 /*44*/ 0,
2892        MatScale_SeqBAIJ,
2893        0,
2894        0,
2895        0,
2896 /*49*/ MatSetBlockSize_SeqBAIJ,
2897        MatGetRowIJ_SeqBAIJ,
2898        MatRestoreRowIJ_SeqBAIJ,
2899        MatGetColumnIJ_SeqBAIJ,
2900        MatRestoreColumnIJ_SeqBAIJ,
2901 /*54*/ MatFDColoringCreate_SeqAIJ,
2902        0,
2903        0,
2904        0,
2905        MatSetValuesBlocked_SeqBAIJ,
2906 /*59*/ MatGetSubMatrix_SeqBAIJ,
2907        MatDestroy_SeqBAIJ,
2908        MatView_SeqBAIJ,
2909        0,
2910        0,
2911 /*64*/ 0,
2912        0,
2913        0,
2914        0,
2915        0,
2916 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2917        0,
2918        MatConvert_Basic,
2919        0,
2920        0,
2921 /*74*/ 0,
2922        MatFDColoringApply_BAIJ,
2923        0,
2924        0,
2925        0,
2926 /*79*/ 0,
2927        0,
2928        0,
2929        0,
2930        MatLoad_SeqBAIJ,
2931 /*84*/ 0,
2932        0,
2933        0,
2934        0,
2935        0,
2936 /*89*/ 0,
2937        0,
2938        0,
2939        0,
2940        0,
2941 /*94*/ 0,
2942        0,
2943        0,
2944        0,
2945        0,
2946 /*99*/0,
2947        0,
2948        0,
2949        0,
2950        0,
2951 /*104*/0,
2952        MatRealPart_SeqBAIJ,
2953        MatImaginaryPart_SeqBAIJ,
2954        0,
2955        0,
2956 /*109*/0,
2957        0,
2958        0,
2959        0,
2960        MatMissingDiagonal_SeqBAIJ,
2961 /*114*/0,
2962        0,
2963        0,
2964        0,
2965        0,
2966 /*119*/0,
2967        0,
2968        MatMultHermitianTranspose_SeqBAIJ,
2969        MatMultHermitianTransposeAdd_SeqBAIJ,
2970        0,
2971 };
2972 
2973 EXTERN_C_BEGIN
2974 #undef __FUNCT__
2975 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2976 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2977 {
2978   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2979   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2980   PetscErrorCode ierr;
2981 
2982   PetscFunctionBegin;
2983   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2984 
2985   /* allocate space for values if not already there */
2986   if (!aij->saved_values) {
2987     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2988     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2989   }
2990 
2991   /* copy values over */
2992   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2993   PetscFunctionReturn(0);
2994 }
2995 EXTERN_C_END
2996 
2997 EXTERN_C_BEGIN
2998 #undef __FUNCT__
2999 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3000 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
3001 {
3002   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3003   PetscErrorCode ierr;
3004   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3005 
3006   PetscFunctionBegin;
3007   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3008   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3009 
3010   /* copy values over */
3011   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3012   PetscFunctionReturn(0);
3013 }
3014 EXTERN_C_END
3015 
3016 EXTERN_C_BEGIN
3017 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3018 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3019 EXTERN_C_END
3020 
3021 EXTERN_C_BEGIN
3022 #undef __FUNCT__
3023 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3024 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3025 {
3026   Mat_SeqBAIJ    *b;
3027   PetscErrorCode ierr;
3028   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
3029   PetscTruth     flg,skipallocation = PETSC_FALSE;
3030 
3031   PetscFunctionBegin;
3032 
3033   if (nz == MAT_SKIP_ALLOCATION) {
3034     skipallocation = PETSC_TRUE;
3035     nz             = 0;
3036   }
3037 
3038   if (bs < 0) {
3039     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
3040       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3041     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3042     bs   = PetscAbs(bs);
3043   }
3044   if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
3045   bs   = newbs;
3046 
3047   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3048   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3049   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3050   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3051 
3052   B->preallocated = PETSC_TRUE;
3053 
3054   mbs  = B->rmap->n/bs;
3055   nbs  = B->cmap->n/bs;
3056   bs2  = bs*bs;
3057 
3058   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3059 
3060   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3061   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3062   if (nnz) {
3063     for (i=0; i<mbs; i++) {
3064       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3065       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3066     }
3067   }
3068 
3069   b       = (Mat_SeqBAIJ*)B->data;
3070   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3071     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3072   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3073 
3074   if (!flg) {
3075     switch (bs) {
3076     case 1:
3077       B->ops->mult            = MatMult_SeqBAIJ_1;
3078       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3079       B->ops->sor             = MatSOR_SeqBAIJ_1;
3080       break;
3081     case 2:
3082       B->ops->mult            = MatMult_SeqBAIJ_2;
3083       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3084       B->ops->sor             = MatSOR_SeqBAIJ_2;
3085       break;
3086     case 3:
3087       B->ops->mult            = MatMult_SeqBAIJ_3;
3088       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3089       B->ops->sor             = MatSOR_SeqBAIJ_3;
3090       break;
3091     case 4:
3092       B->ops->mult            = MatMult_SeqBAIJ_4;
3093       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3094       B->ops->sor             = MatSOR_SeqBAIJ_4;
3095       break;
3096     case 5:
3097       B->ops->mult            = MatMult_SeqBAIJ_5;
3098       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3099       B->ops->sor             = MatSOR_SeqBAIJ_5;
3100       break;
3101     case 6:
3102       B->ops->mult            = MatMult_SeqBAIJ_6;
3103       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3104       B->ops->sor             = MatSOR_SeqBAIJ_6;
3105       break;
3106     case 7:
3107       B->ops->mult            = MatMult_SeqBAIJ_7;
3108       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3109       B->ops->sor             = MatSOR_SeqBAIJ_7;
3110       break;
3111     case 15:
3112       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3113       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3114       B->ops->sor             = MatSOR_SeqBAIJ_N;
3115       break;
3116     default:
3117       B->ops->mult            = MatMult_SeqBAIJ_N;
3118       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3119       B->ops->sor             = MatSOR_SeqBAIJ_N;
3120       break;
3121     }
3122   }
3123   B->rmap->bs  = bs;
3124   b->mbs       = mbs;
3125   b->nbs       = nbs;
3126   if (!skipallocation) {
3127     if (!b->imax) {
3128       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3129       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3130       b->free_imax_ilen = PETSC_TRUE;
3131     }
3132     /* b->ilen will count nonzeros in each block row so far. */
3133     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3134     if (!nnz) {
3135       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3136       else if (nz <= 0)        nz = 1;
3137       for (i=0; i<mbs; i++) b->imax[i] = nz;
3138       nz = nz*mbs;
3139     } else {
3140       nz = 0;
3141       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3142     }
3143 
3144     /* allocate the matrix space */
3145     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3146     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3147     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3148     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3149     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3150     b->singlemalloc = PETSC_TRUE;
3151     b->i[0] = 0;
3152     for (i=1; i<mbs+1; i++) {
3153       b->i[i] = b->i[i-1] + b->imax[i-1];
3154     }
3155     b->free_a     = PETSC_TRUE;
3156     b->free_ij    = PETSC_TRUE;
3157   } else {
3158     b->free_a     = PETSC_FALSE;
3159     b->free_ij    = PETSC_FALSE;
3160   }
3161 
3162   B->rmap->bs          = bs;
3163   b->bs2              = bs2;
3164   b->mbs              = mbs;
3165   b->nz               = 0;
3166   b->maxnz            = nz*bs2;
3167   B->info.nz_unneeded = (PetscReal)b->maxnz;
3168   PetscFunctionReturn(0);
3169 }
3170 EXTERN_C_END
3171 
3172 EXTERN_C_BEGIN
3173 #undef __FUNCT__
3174 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3175 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3176 {
3177   PetscInt       i,m,nz,nz_max=0,*nnz;
3178   PetscScalar    *values=0;
3179   PetscErrorCode ierr;
3180 
3181   PetscFunctionBegin;
3182   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3183   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3184   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3185   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3186   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3187   m = B->rmap->n/bs;
3188 
3189   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3190   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3191   for(i=0; i<m; i++) {
3192     nz = ii[i+1]- ii[i];
3193     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3194     nz_max = PetscMax(nz_max, nz);
3195     nnz[i] = nz;
3196   }
3197   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3198   ierr = PetscFree(nnz);CHKERRQ(ierr);
3199 
3200   values = (PetscScalar*)V;
3201   if (!values) {
3202     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3203     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3204   }
3205   for (i=0; i<m; i++) {
3206     PetscInt          ncols  = ii[i+1] - ii[i];
3207     const PetscInt    *icols = jj + ii[i];
3208     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3209     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3210   }
3211   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3212   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3213   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3214 
3215   PetscFunctionReturn(0);
3216 }
3217 EXTERN_C_END
3218 
3219 
3220 EXTERN_C_BEGIN
3221 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3222 #if defined(PETSC_HAVE_MUMPS)
3223 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3224 #endif
3225 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3226 EXTERN_C_END
3227 
3228 /*MC
3229    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3230    block sparse compressed row format.
3231 
3232    Options Database Keys:
3233 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3234 
3235   Level: beginner
3236 
3237 .seealso: MatCreateSeqBAIJ()
3238 M*/
3239 
3240 
3241 EXTERN_C_BEGIN
3242 #undef __FUNCT__
3243 #define __FUNCT__ "MatCreate_SeqBAIJ"
3244 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
3245 {
3246   PetscErrorCode ierr;
3247   PetscMPIInt    size;
3248   Mat_SeqBAIJ    *b;
3249 
3250   PetscFunctionBegin;
3251   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3252   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3253 
3254   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3255   B->data = (void*)b;
3256   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3257   B->mapping               = 0;
3258   b->row                   = 0;
3259   b->col                   = 0;
3260   b->icol                  = 0;
3261   b->reallocs              = 0;
3262   b->saved_values          = 0;
3263 
3264   b->roworiented           = PETSC_TRUE;
3265   b->nonew                 = 0;
3266   b->diag                  = 0;
3267   b->solve_work            = 0;
3268   b->mult_work             = 0;
3269   B->spptr                 = 0;
3270   B->info.nz_unneeded      = (PetscReal)b->maxnz;
3271   b->keepnonzeropattern    = PETSC_FALSE;
3272   b->xtoy                  = 0;
3273   b->XtoY                  = 0;
3274   b->compressedrow.use     = PETSC_FALSE;
3275   b->compressedrow.nrows   = 0;
3276   b->compressedrow.i       = PETSC_NULL;
3277   b->compressedrow.rindex  = PETSC_NULL;
3278   b->compressedrow.checked = PETSC_FALSE;
3279   B->same_nonzero          = PETSC_FALSE;
3280 
3281   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3282                                      "MatGetFactorAvailable_seqbaij_petsc",
3283                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3284   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3285                                      "MatGetFactor_seqbaij_petsc",
3286                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3287 #if defined(PETSC_HAVE_MUMPS)
3288   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3289 #endif
3290   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3291                                      "MatInvertBlockDiagonal_SeqBAIJ",
3292                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3293   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3294                                      "MatStoreValues_SeqBAIJ",
3295                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3296   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3297                                      "MatRetrieveValues_SeqBAIJ",
3298                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3299   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3300                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3301                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3302   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3303                                      "MatConvert_SeqBAIJ_SeqAIJ",
3304                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3305   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3306                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3307                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3308   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3309                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3310                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3311   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3312                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3313                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3314   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3315   PetscFunctionReturn(0);
3316 }
3317 EXTERN_C_END
3318 
3319 #undef __FUNCT__
3320 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3321 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace)
3322 {
3323   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3324   PetscErrorCode ierr;
3325   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3326 
3327   PetscFunctionBegin;
3328   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3329 
3330   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3331     c->imax = a->imax;
3332     c->ilen = a->ilen;
3333     c->free_imax_ilen = PETSC_FALSE;
3334   } else {
3335     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3336     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3337     for (i=0; i<mbs; i++) {
3338       c->imax[i] = a->imax[i];
3339       c->ilen[i] = a->ilen[i];
3340     }
3341     c->free_imax_ilen = PETSC_TRUE;
3342   }
3343 
3344   /* allocate the matrix space */
3345   if (mallocmatspace){
3346     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3347       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3348       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3349       c->singlemalloc = PETSC_FALSE;
3350       c->free_ij      = PETSC_FALSE;
3351       c->i            = a->i;
3352       c->j            = a->j;
3353       c->parent       = A;
3354       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3355       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3356       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3357     } else {
3358       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3359       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3360       c->singlemalloc = PETSC_TRUE;
3361       c->free_ij      = PETSC_TRUE;
3362       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3363       if (mbs > 0) {
3364 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3365 	if (cpvalues == MAT_COPY_VALUES) {
3366 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3367 	} else {
3368 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3369 	}
3370       }
3371     }
3372   }
3373 
3374   c->roworiented = a->roworiented;
3375   c->nonew       = a->nonew;
3376   ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr);
3377   ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr);
3378   c->bs2         = a->bs2;
3379   c->mbs         = a->mbs;
3380   c->nbs         = a->nbs;
3381 
3382   if (a->diag) {
3383     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3384       c->diag      = a->diag;
3385       c->free_diag = PETSC_FALSE;
3386     } else {
3387       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3388       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3389       for (i=0; i<mbs; i++) {
3390         c->diag[i] = a->diag[i];
3391       }
3392       c->free_diag = PETSC_TRUE;
3393     }
3394   } else c->diag        = 0;
3395   c->nz                 = a->nz;
3396   c->maxnz              = bs2*a->nz; /* Since we allocate exactly the right amount */
3397   c->solve_work         = 0;
3398   c->mult_work          = 0;
3399   c->free_a             = PETSC_TRUE;
3400   c->free_ij            = PETSC_TRUE;
3401   C->preallocated       = PETSC_TRUE;
3402   C->assembled          = PETSC_TRUE;
3403 
3404   c->compressedrow.use     = a->compressedrow.use;
3405   c->compressedrow.nrows   = a->compressedrow.nrows;
3406   c->compressedrow.checked = a->compressedrow.checked;
3407   if (a->compressedrow.checked && a->compressedrow.use){
3408     i = a->compressedrow.nrows;
3409     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3410     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3411     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3412     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3413   } else {
3414     c->compressedrow.use    = PETSC_FALSE;
3415     c->compressedrow.i      = PETSC_NULL;
3416     c->compressedrow.rindex = PETSC_NULL;
3417   }
3418   C->same_nonzero = A->same_nonzero;
3419   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3420   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3421   PetscFunctionReturn(0);
3422 }
3423 
3424 #undef __FUNCT__
3425 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3426 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3427 {
3428     PetscErrorCode ierr;
3429 
3430   PetscFunctionBegin;
3431   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3432   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3433   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3434   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3435   PetscFunctionReturn(0);
3436 }
3437 
3438 #undef __FUNCT__
3439 #define __FUNCT__ "MatLoad_SeqBAIJ"
3440 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3441 {
3442   Mat_SeqBAIJ    *a;
3443   PetscErrorCode ierr;
3444   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3445   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3446   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3447   PetscInt       *masked,nmask,tmp,bs2,ishift;
3448   PetscMPIInt    size;
3449   int            fd;
3450   PetscScalar    *aa;
3451   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3452 
3453   PetscFunctionBegin;
3454   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3455   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3456   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3457   bs2  = bs*bs;
3458 
3459   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3460   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3461   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3462   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3463   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3464   M = header[1]; N = header[2]; nz = header[3];
3465 
3466   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3467   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3468 
3469   /*
3470      This code adds extra rows to make sure the number of rows is
3471     divisible by the blocksize
3472   */
3473   mbs        = M/bs;
3474   extra_rows = bs - M + bs*(mbs);
3475   if (extra_rows == bs) extra_rows = 0;
3476   else                  mbs++;
3477   if (extra_rows) {
3478     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3479   }
3480 
3481   /* Set global sizes if not already set */
3482   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3483     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3484   } else { /* Check if the matrix global sizes are correct */
3485     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3486     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3487   }
3488 
3489   /* read in row lengths */
3490   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3491   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3492   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3493 
3494   /* read in column indices */
3495   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3496   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3497   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3498 
3499   /* loop over row lengths determining block row lengths */
3500   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3501   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3502   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3503   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3504   rowcount = 0;
3505   nzcount = 0;
3506   for (i=0; i<mbs; i++) {
3507     nmask = 0;
3508     for (j=0; j<bs; j++) {
3509       kmax = rowlengths[rowcount];
3510       for (k=0; k<kmax; k++) {
3511         tmp = jj[nzcount++]/bs;
3512         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3513       }
3514       rowcount++;
3515     }
3516     browlengths[i] += nmask;
3517     /* zero out the mask elements we set */
3518     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3519   }
3520 
3521   /* Do preallocation  */
3522   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3523   a = (Mat_SeqBAIJ*)newmat->data;
3524 
3525   /* set matrix "i" values */
3526   a->i[0] = 0;
3527   for (i=1; i<= mbs; i++) {
3528     a->i[i]      = a->i[i-1] + browlengths[i-1];
3529     a->ilen[i-1] = browlengths[i-1];
3530   }
3531   a->nz         = 0;
3532   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3533 
3534   /* read in nonzero values */
3535   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3536   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3537   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3538 
3539   /* set "a" and "j" values into matrix */
3540   nzcount = 0; jcount = 0;
3541   for (i=0; i<mbs; i++) {
3542     nzcountb = nzcount;
3543     nmask    = 0;
3544     for (j=0; j<bs; j++) {
3545       kmax = rowlengths[i*bs+j];
3546       for (k=0; k<kmax; k++) {
3547         tmp = jj[nzcount++]/bs;
3548 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3549       }
3550     }
3551     /* sort the masked values */
3552     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3553 
3554     /* set "j" values into matrix */
3555     maskcount = 1;
3556     for (j=0; j<nmask; j++) {
3557       a->j[jcount++]  = masked[j];
3558       mask[masked[j]] = maskcount++;
3559     }
3560     /* set "a" values into matrix */
3561     ishift = bs2*a->i[i];
3562     for (j=0; j<bs; j++) {
3563       kmax = rowlengths[i*bs+j];
3564       for (k=0; k<kmax; k++) {
3565         tmp       = jj[nzcountb]/bs ;
3566         block     = mask[tmp] - 1;
3567         point     = jj[nzcountb] - bs*tmp;
3568         idx       = ishift + bs2*block + j + bs*point;
3569         a->a[idx] = (MatScalar)aa[nzcountb++];
3570       }
3571     }
3572     /* zero out the mask elements we set */
3573     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3574   }
3575   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3576 
3577   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3578   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3579   ierr = PetscFree(aa);CHKERRQ(ierr);
3580   ierr = PetscFree(jj);CHKERRQ(ierr);
3581   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3582 
3583   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3584   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3585   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3586   PetscFunctionReturn(0);
3587 }
3588 
3589 #undef __FUNCT__
3590 #define __FUNCT__ "MatCreateSeqBAIJ"
3591 /*@C
3592    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3593    compressed row) format.  For good matrix assembly performance the
3594    user should preallocate the matrix storage by setting the parameter nz
3595    (or the array nnz).  By setting these parameters accurately, performance
3596    during matrix assembly can be increased by more than a factor of 50.
3597 
3598    Collective on MPI_Comm
3599 
3600    Input Parameters:
3601 +  comm - MPI communicator, set to PETSC_COMM_SELF
3602 .  bs - size of block
3603 .  m - number of rows
3604 .  n - number of columns
3605 .  nz - number of nonzero blocks  per block row (same for all rows)
3606 -  nnz - array containing the number of nonzero blocks in the various block rows
3607          (possibly different for each block row) or PETSC_NULL
3608 
3609    Output Parameter:
3610 .  A - the matrix
3611 
3612    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3613    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3614    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3615 
3616    Options Database Keys:
3617 .   -mat_no_unroll - uses code that does not unroll the loops in the
3618                      block calculations (much slower)
3619 .    -mat_block_size - size of the blocks to use
3620 
3621    Level: intermediate
3622 
3623    Notes:
3624    The number of rows and columns must be divisible by blocksize.
3625 
3626    If the nnz parameter is given then the nz parameter is ignored
3627 
3628    A nonzero block is any block that as 1 or more nonzeros in it
3629 
3630    The block AIJ format is fully compatible with standard Fortran 77
3631    storage.  That is, the stored row and column indices can begin at
3632    either one (as in Fortran) or zero.  See the users' manual for details.
3633 
3634    Specify the preallocated storage with either nz or nnz (not both).
3635    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3636    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3637    matrices.
3638 
3639 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3640 @*/
3641 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3642 {
3643   PetscErrorCode ierr;
3644 
3645   PetscFunctionBegin;
3646   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3647   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3648   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3649   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3650   PetscFunctionReturn(0);
3651 }
3652 
3653 #undef __FUNCT__
3654 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3655 /*@C
3656    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3657    per row in the matrix. For good matrix assembly performance the
3658    user should preallocate the matrix storage by setting the parameter nz
3659    (or the array nnz).  By setting these parameters accurately, performance
3660    during matrix assembly can be increased by more than a factor of 50.
3661 
3662    Collective on MPI_Comm
3663 
3664    Input Parameters:
3665 +  A - the matrix
3666 .  bs - size of block
3667 .  nz - number of block nonzeros per block row (same for all rows)
3668 -  nnz - array containing the number of block nonzeros in the various block rows
3669          (possibly different for each block row) or PETSC_NULL
3670 
3671    Options Database Keys:
3672 .   -mat_no_unroll - uses code that does not unroll the loops in the
3673                      block calculations (much slower)
3674 .    -mat_block_size - size of the blocks to use
3675 
3676    Level: intermediate
3677 
3678    Notes:
3679    If the nnz parameter is given then the nz parameter is ignored
3680 
3681    You can call MatGetInfo() to get information on how effective the preallocation was;
3682    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3683    You can also run with the option -info and look for messages with the string
3684    malloc in them to see if additional memory allocation was needed.
3685 
3686    The block AIJ format is fully compatible with standard Fortran 77
3687    storage.  That is, the stored row and column indices can begin at
3688    either one (as in Fortran) or zero.  See the users' manual for details.
3689 
3690    Specify the preallocated storage with either nz or nnz (not both).
3691    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3692    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3693 
3694 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3695 @*/
3696 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3697 {
3698   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3699 
3700   PetscFunctionBegin;
3701   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3702   if (f) {
3703     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3704   }
3705   PetscFunctionReturn(0);
3706 }
3707 
3708 #undef __FUNCT__
3709 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3710 /*@C
3711    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3712    (the default sequential PETSc format).
3713 
3714    Collective on MPI_Comm
3715 
3716    Input Parameters:
3717 +  A - the matrix
3718 .  i - the indices into j for the start of each local row (starts with zero)
3719 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3720 -  v - optional values in the matrix
3721 
3722    Level: developer
3723 
3724 .keywords: matrix, aij, compressed row, sparse
3725 
3726 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3727 @*/
3728 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3729 {
3730   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3731 
3732   PetscFunctionBegin;
3733   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3734   if (f) {
3735     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3736   }
3737   PetscFunctionReturn(0);
3738 }
3739 
3740 
3741 #undef __FUNCT__
3742 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3743 /*@
3744      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3745               (upper triangular entries in CSR format) provided by the user.
3746 
3747      Collective on MPI_Comm
3748 
3749    Input Parameters:
3750 +  comm - must be an MPI communicator of size 1
3751 .  bs - size of block
3752 .  m - number of rows
3753 .  n - number of columns
3754 .  i - row indices
3755 .  j - column indices
3756 -  a - matrix values
3757 
3758    Output Parameter:
3759 .  mat - the matrix
3760 
3761    Level: intermediate
3762 
3763    Notes:
3764        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3765     once the matrix is destroyed
3766 
3767        You cannot set new nonzero locations into this matrix, that will generate an error.
3768 
3769        The i and j indices are 0 based
3770 
3771 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3772 
3773 @*/
3774 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3775 {
3776   PetscErrorCode ierr;
3777   PetscInt       ii;
3778   Mat_SeqBAIJ    *baij;
3779 
3780   PetscFunctionBegin;
3781   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3782   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3783 
3784   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3785   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3786   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3787   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3788   baij = (Mat_SeqBAIJ*)(*mat)->data;
3789   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3790   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3791 
3792   baij->i = i;
3793   baij->j = j;
3794   baij->a = a;
3795   baij->singlemalloc = PETSC_FALSE;
3796   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3797   baij->free_a       = PETSC_FALSE;
3798   baij->free_ij       = PETSC_FALSE;
3799 
3800   for (ii=0; ii<m; ii++) {
3801     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3802 #if defined(PETSC_USE_DEBUG)
3803     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3804 #endif
3805   }
3806 #if defined(PETSC_USE_DEBUG)
3807   for (ii=0; ii<baij->i[m]; ii++) {
3808     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3809     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3810   }
3811 #endif
3812 
3813   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3814   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3815   PetscFunctionReturn(0);
3816 }
3817