xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 8bc8193efbc389280f83b3d41dffa9e2d23e2ace)
1 /*
2     Defines the basic matrix operations for the BAIJ (compressed row)
3   matrix storage format.
4 */
5 #include "src/mat/impls/baij/seq/baij.h"
6 #include "src/inline/spops.h"
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 
9 #include "src/inline/ilu.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
18   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
19 
20   PetscFunctionBegin;
21   if (a->idiagvalid) PetscFunctionReturn(0);
22   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
23   diag_offset = a->diag;
24   if (!a->idiag) {
25     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
26   }
27   diag  = a->idiag;
28   mdiag = a->idiag+bs*bs*mbs;
29   /* factor and invert each block */
30   switch (bs){
31     case 2:
32       for (i=0; i<mbs; i++) {
33         odiag   = v + 4*diag_offset[i];
34         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
35 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
36 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
37 	diag    += 4;
38 	mdiag   += 4;
39       }
40       break;
41     case 3:
42       for (i=0; i<mbs; i++) {
43         odiag    = v + 9*diag_offset[i];
44         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
45         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
46         diag[8]  = odiag[8];
47         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
48         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
49         mdiag[8] = odiag[8];
50 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
51 	diag    += 9;
52 	mdiag   += 9;
53       }
54       break;
55     case 4:
56       for (i=0; i<mbs; i++) {
57         odiag  = v + 16*diag_offset[i];
58         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
59         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
60 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
61 	diag  += 16;
62 	mdiag += 16;
63       }
64       break;
65     case 5:
66       for (i=0; i<mbs; i++) {
67         odiag = v + 25*diag_offset[i];
68         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
69         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
70 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
71 	diag  += 25;
72 	mdiag += 25;
73       }
74       break;
75     default:
76       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
77   }
78   a->idiagvalid = PETSC_TRUE;
79   PetscFunctionReturn(0);
80 }
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
84 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
85 {
86   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
87   PetscScalar        *x,x1,x2,s1,s2;
88   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
89   PetscErrorCode     ierr;
90   PetscInt           m = a->mbs,i,i2,nz,idx;
91   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
92 
93   PetscFunctionBegin;
94   its = its*lits;
95   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
96   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
97   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
98   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
99   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
100 
101   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
102 
103   diag  = a->diag;
104   idiag = a->idiag;
105   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
106   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
107 
108   if (flag & SOR_ZERO_INITIAL_GUESS) {
109     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
110       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
111       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
112       i2     = 2;
113       idiag += 4;
114       for (i=1; i<m; i++) {
115 	v     = aa + 4*ai[i];
116 	vi    = aj + ai[i];
117 	nz    = diag[i] - ai[i];
118 	s1    = b[i2]; s2 = b[i2+1];
119 	while (nz--) {
120 	  idx  = 2*(*vi++);
121 	  x1   = x[idx]; x2 = x[1+idx];
122 	  s1  -= v[0]*x1 + v[2]*x2;
123 	  s2  -= v[1]*x1 + v[3]*x2;
124 	  v   += 4;
125 	}
126 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
127 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
128         idiag   += 4;
129         i2      += 2;
130       }
131       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
132       PetscLogFlops(4*(a->nz));
133     }
134     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
135         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
136       i2    = 0;
137       mdiag = a->idiag+4*a->mbs;
138       for (i=0; i<m; i++) {
139         x1      = x[i2]; x2 = x[i2+1];
140         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
141         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
142         mdiag  += 4;
143         i2     += 2;
144       }
145       PetscLogFlops(6*m);
146     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
147       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
148     }
149     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
150       idiag   = a->idiag+4*a->mbs - 4;
151       i2      = 2*m - 2;
152       x1      = x[i2]; x2 = x[i2+1];
153       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
154       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
155       idiag -= 4;
156       i2    -= 2;
157       for (i=m-2; i>=0; i--) {
158 	v     = aa + 4*(diag[i]+1);
159 	vi    = aj + diag[i] + 1;
160 	nz    = ai[i+1] - diag[i] - 1;
161 	s1    = x[i2]; s2 = x[i2+1];
162 	while (nz--) {
163 	  idx  = 2*(*vi++);
164 	  x1   = x[idx]; x2 = x[1+idx];
165 	  s1  -= v[0]*x1 + v[2]*x2;
166 	  s2  -= v[1]*x1 + v[3]*x2;
167 	  v   += 4;
168 	}
169 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
170 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
171         idiag   -= 4;
172         i2      -= 2;
173       }
174       PetscLogFlops(4*(a->nz));
175     }
176   } else {
177     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
178   }
179   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
180   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
181   PetscFunctionReturn(0);
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
186 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
187 {
188   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
189   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
190   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
191   PetscErrorCode     ierr;
192   PetscInt           m = a->mbs,i,i2,nz,idx;
193   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
194 
195   PetscFunctionBegin;
196   its = its*lits;
197   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
198   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
199   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
200   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
201   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
202 
203   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
204 
205   diag  = a->diag;
206   idiag = a->idiag;
207   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
208   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
209 
210   if (flag & SOR_ZERO_INITIAL_GUESS) {
211     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
212       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
213       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
214       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
215       i2     = 3;
216       idiag += 9;
217       for (i=1; i<m; i++) {
218 	v     = aa + 9*ai[i];
219 	vi    = aj + ai[i];
220 	nz    = diag[i] - ai[i];
221 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
222 	while (nz--) {
223 	  idx  = 3*(*vi++);
224 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
225 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
226 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
227 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
228 	  v   += 9;
229 	}
230 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
231 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
232 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
233         idiag   += 9;
234         i2      += 3;
235       }
236       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
237       PetscLogFlops(9*(a->nz));
238     }
239     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
240         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
241       i2    = 0;
242       mdiag = a->idiag+9*a->mbs;
243       for (i=0; i<m; i++) {
244         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
245         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
246         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
247         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
248         mdiag  += 9;
249         i2     += 3;
250       }
251       PetscLogFlops(15*m);
252     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
253       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
254     }
255     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
256       idiag   = a->idiag+9*a->mbs - 9;
257       i2      = 3*m - 3;
258       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
259       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
260       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
261       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
262       idiag -= 9;
263       i2    -= 3;
264       for (i=m-2; i>=0; i--) {
265 	v     = aa + 9*(diag[i]+1);
266 	vi    = aj + diag[i] + 1;
267 	nz    = ai[i+1] - diag[i] - 1;
268 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
269 	while (nz--) {
270 	  idx  = 3*(*vi++);
271 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
272 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
273 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
274 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
275 	  v   += 9;
276 	}
277 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
278 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
279 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
280         idiag   -= 9;
281         i2      -= 3;
282       }
283       PetscLogFlops(9*(a->nz));
284     }
285   } else {
286     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
287   }
288   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
289   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
290   PetscFunctionReturn(0);
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
295 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
296 {
297   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
298   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
299   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
300   PetscErrorCode     ierr;
301   PetscInt           m = a->mbs,i,i2,nz,idx;
302   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
303 
304   PetscFunctionBegin;
305   its = its*lits;
306   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
307   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
308   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
309   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
310   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
311 
312   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
313 
314   diag  = a->diag;
315   idiag = a->idiag;
316   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
317   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
318 
319   if (flag & SOR_ZERO_INITIAL_GUESS) {
320     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
321       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
322       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
323       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
324       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
325       i2     = 4;
326       idiag += 16;
327       for (i=1; i<m; i++) {
328 	v     = aa + 16*ai[i];
329 	vi    = aj + ai[i];
330 	nz    = diag[i] - ai[i];
331 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
332 	while (nz--) {
333 	  idx  = 4*(*vi++);
334 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
335 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
336 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
337 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
338 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
339 	  v   += 16;
340 	}
341 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
342 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
343 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
344 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
345         idiag   += 16;
346         i2      += 4;
347       }
348       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
349       PetscLogFlops(16*(a->nz));
350     }
351     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
352         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
353       i2    = 0;
354       mdiag = a->idiag+16*a->mbs;
355       for (i=0; i<m; i++) {
356         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
357         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
358         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
359         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
360         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
361         mdiag  += 16;
362         i2     += 4;
363       }
364       PetscLogFlops(28*m);
365     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
366       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
367     }
368     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
369       idiag   = a->idiag+16*a->mbs - 16;
370       i2      = 4*m - 4;
371       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
372       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
373       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
374       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
375       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
376       idiag -= 16;
377       i2    -= 4;
378       for (i=m-2; i>=0; i--) {
379 	v     = aa + 16*(diag[i]+1);
380 	vi    = aj + diag[i] + 1;
381 	nz    = ai[i+1] - diag[i] - 1;
382 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
383 	while (nz--) {
384 	  idx  = 4*(*vi++);
385 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
386 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
387 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
388 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
389 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
390 	  v   += 16;
391 	}
392 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
393 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
394 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
395 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
396         idiag   -= 16;
397         i2      -= 4;
398       }
399       PetscLogFlops(16*(a->nz));
400     }
401   } else {
402     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
403   }
404   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
405   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
411 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
412 {
413   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
414   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
415   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
416   PetscErrorCode     ierr;
417   PetscInt           m = a->mbs,i,i2,nz,idx;
418   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
419 
420   PetscFunctionBegin;
421   its = its*lits;
422   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
423   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
424   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
425   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
426   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
427 
428   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
429 
430   diag  = a->diag;
431   idiag = a->idiag;
432   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
433   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
434 
435   if (flag & SOR_ZERO_INITIAL_GUESS) {
436     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
437       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
438       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
439       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
440       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
441       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
442       i2     = 5;
443       idiag += 25;
444       for (i=1; i<m; i++) {
445 	v     = aa + 25*ai[i];
446 	vi    = aj + ai[i];
447 	nz    = diag[i] - ai[i];
448 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
449 	while (nz--) {
450 	  idx  = 5*(*vi++);
451 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
452 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
453 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
454 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
455 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
456 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
457 	  v   += 25;
458 	}
459 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
460 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
461 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
462 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
463 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
464         idiag   += 25;
465         i2      += 5;
466       }
467       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
468       PetscLogFlops(25*(a->nz));
469     }
470     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
471         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
472       i2    = 0;
473       mdiag = a->idiag+25*a->mbs;
474       for (i=0; i<m; i++) {
475         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
476         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
477         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
478         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
479         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
480         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
481         mdiag  += 25;
482         i2     += 5;
483       }
484       PetscLogFlops(45*m);
485     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
486       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
487     }
488     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
489       idiag   = a->idiag+25*a->mbs - 25;
490       i2      = 5*m - 5;
491       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
492       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
493       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
494       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
495       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
496       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
497       idiag -= 25;
498       i2    -= 5;
499       for (i=m-2; i>=0; i--) {
500 	v     = aa + 25*(diag[i]+1);
501 	vi    = aj + diag[i] + 1;
502 	nz    = ai[i+1] - diag[i] - 1;
503 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
504 	while (nz--) {
505 	  idx  = 5*(*vi++);
506 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
507 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
508 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
509 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
510 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
511 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
512 	  v   += 25;
513 	}
514 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
515 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
516 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
517 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
518 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
519         idiag   -= 25;
520         i2      -= 5;
521       }
522       PetscLogFlops(25*(a->nz));
523     }
524   } else {
525     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
526   }
527   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
528   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
529   PetscFunctionReturn(0);
530 }
531 
532 /*
533     Special version for Fun3d sequential benchmark
534 */
535 #if defined(PETSC_HAVE_FORTRAN_CAPS)
536 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
537 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
538 #define matsetvaluesblocked4_ matsetvaluesblocked4
539 #endif
540 
541 EXTERN_C_BEGIN
542 #undef __FUNCT__
543 #define __FUNCT__ "matsetvaluesblocked4_"
544 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
545 {
546   Mat               A = *AA;
547   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
548   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
549   PetscInt          *ai=a->i,*ailen=a->ilen;
550   PetscInt          *aj=a->j,stepval;
551   const PetscScalar *value = v;
552   MatScalar         *ap,*aa = a->a,*bap;
553 
554   PetscFunctionBegin;
555   stepval = (n-1)*4;
556   for (k=0; k<m; k++) { /* loop over added rows */
557     row  = im[k];
558     rp   = aj + ai[row];
559     ap   = aa + 16*ai[row];
560     nrow = ailen[row];
561     low  = 0;
562     for (l=0; l<n; l++) { /* loop over added columns */
563       col = in[l];
564       value = v + k*(stepval+4)*4 + l*4;
565       low = 0; high = nrow;
566       while (high-low > 7) {
567         t = (low+high)/2;
568         if (rp[t] > col) high = t;
569         else             low  = t;
570       }
571       for (i=low; i<high; i++) {
572         if (rp[i] > col) break;
573         if (rp[i] == col) {
574           bap  = ap +  16*i;
575           for (ii=0; ii<4; ii++,value+=stepval) {
576             for (jj=ii; jj<16; jj+=4) {
577               bap[jj] += *value++;
578             }
579           }
580           goto noinsert2;
581         }
582       }
583       N = nrow++ - 1;
584       /* shift up all the later entries in this row */
585       for (ii=N; ii>=i; ii--) {
586         rp[ii+1] = rp[ii];
587         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
588       }
589       if (N >= i) {
590         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
591       }
592       rp[i] = col;
593       bap   = ap +  16*i;
594       for (ii=0; ii<4; ii++,value+=stepval) {
595         for (jj=ii; jj<16; jj+=4) {
596           bap[jj] = *value++;
597         }
598       }
599       noinsert2:;
600       low = i;
601     }
602     ailen[row] = nrow;
603   }
604 }
605 EXTERN_C_END
606 
607 #if defined(PETSC_HAVE_FORTRAN_CAPS)
608 #define matsetvalues4_ MATSETVALUES4
609 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
610 #define matsetvalues4_ matsetvalues4
611 #endif
612 
613 EXTERN_C_BEGIN
614 #undef __FUNCT__
615 #define __FUNCT__ "MatSetValues4_"
616 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
617 {
618   Mat         A = *AA;
619   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
620   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
621   PetscInt    *ai=a->i,*ailen=a->ilen;
622   PetscInt    *aj=a->j,brow,bcol;
623   PetscInt    ridx,cidx;
624   MatScalar   *ap,value,*aa=a->a,*bap;
625 
626   PetscFunctionBegin;
627   for (k=0; k<m; k++) { /* loop over added rows */
628     row  = im[k]; brow = row/4;
629     rp   = aj + ai[brow];
630     ap   = aa + 16*ai[brow];
631     nrow = ailen[brow];
632     low  = 0;
633     for (l=0; l<n; l++) { /* loop over added columns */
634       col = in[l]; bcol = col/4;
635       ridx = row % 4; cidx = col % 4;
636       value = v[l + k*n];
637       low = 0; high = nrow;
638       while (high-low > 7) {
639         t = (low+high)/2;
640         if (rp[t] > bcol) high = t;
641         else              low  = t;
642       }
643       for (i=low; i<high; i++) {
644         if (rp[i] > bcol) break;
645         if (rp[i] == bcol) {
646           bap  = ap +  16*i + 4*cidx + ridx;
647           *bap += value;
648           goto noinsert1;
649         }
650       }
651       N = nrow++ - 1;
652       /* shift up all the later entries in this row */
653       for (ii=N; ii>=i; ii--) {
654         rp[ii+1] = rp[ii];
655         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
656       }
657       if (N>=i) {
658         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
659       }
660       rp[i]                    = bcol;
661       ap[16*i + 4*cidx + ridx] = value;
662       noinsert1:;
663       low = i;
664     }
665     ailen[brow] = nrow;
666   }
667 }
668 EXTERN_C_END
669 
670 /*  UGLY, ugly, ugly
671    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
672    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
673    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
674    converts the entries into single precision and then calls ..._MatScalar() to put them
675    into the single precision data structures.
676 */
677 #if defined(PETSC_USE_MAT_SINGLE)
678 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
679 #else
680 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
681 #endif
682 
683 #define CHUNKSIZE  10
684 
685 /*
686      Checks for missing diagonals
687 */
688 #undef __FUNCT__
689 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
690 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
691 {
692   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
693   PetscErrorCode ierr;
694   PetscInt       *diag,*jj = a->j,i;
695 
696   PetscFunctionBegin;
697   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
698   diag = a->diag;
699   for (i=0; i<a->mbs; i++) {
700     if (jj[diag[i]] != i) {
701       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
702     }
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
709 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
710 {
711   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
712   PetscErrorCode ierr;
713   PetscInt       i,j,*diag,m = a->mbs;
714 
715   PetscFunctionBegin;
716   if (a->diag) PetscFunctionReturn(0);
717 
718   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
719   PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));
720   for (i=0; i<m; i++) {
721     diag[i] = a->i[i+1];
722     for (j=a->i[i]; j<a->i[i+1]; j++) {
723       if (a->j[j] == i) {
724         diag[i] = j;
725         break;
726       }
727     }
728   }
729   a->diag = diag;
730   PetscFunctionReturn(0);
731 }
732 
733 
734 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
735 
736 #undef __FUNCT__
737 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
738 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
739 {
740   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
741   PetscErrorCode ierr;
742   PetscInt       n = a->mbs,i;
743 
744   PetscFunctionBegin;
745   *nn = n;
746   if (!ia) PetscFunctionReturn(0);
747   if (symmetric) {
748     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
749   } else if (oshift == 1) {
750     /* temporarily add 1 to i and j indices */
751     PetscInt nz = a->i[n];
752     for (i=0; i<nz; i++) a->j[i]++;
753     for (i=0; i<n+1; i++) a->i[i]++;
754     *ia = a->i; *ja = a->j;
755   } else {
756     *ia = a->i; *ja = a->j;
757   }
758 
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNCT__
763 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
764 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
765 {
766   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
767   PetscErrorCode ierr;
768   PetscInt       i,n = a->mbs;
769 
770   PetscFunctionBegin;
771   if (!ia) PetscFunctionReturn(0);
772   if (symmetric) {
773     ierr = PetscFree(*ia);CHKERRQ(ierr);
774     ierr = PetscFree(*ja);CHKERRQ(ierr);
775   } else if (oshift == 1) {
776     PetscInt nz = a->i[n]-1;
777     for (i=0; i<nz; i++) a->j[i]--;
778     for (i=0; i<n+1; i++) a->i[i]--;
779   }
780   PetscFunctionReturn(0);
781 }
782 
783 #undef __FUNCT__
784 #define __FUNCT__ "MatDestroy_SeqBAIJ"
785 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
786 {
787   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
788   PetscErrorCode ierr;
789 
790   PetscFunctionBegin;
791 #if defined(PETSC_USE_LOG)
792   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
793 #endif
794   ierr = PetscFree(a->a);CHKERRQ(ierr);
795   if (!a->singlemalloc) {
796     ierr = PetscFree(a->i);CHKERRQ(ierr);
797     ierr = PetscFree(a->j);CHKERRQ(ierr);
798   }
799   if (a->row) {
800     ierr = ISDestroy(a->row);CHKERRQ(ierr);
801   }
802   if (a->col) {
803     ierr = ISDestroy(a->col);CHKERRQ(ierr);
804   }
805   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
806   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
807   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
808   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
809   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
810   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
811   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
812   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
813 #if defined(PETSC_USE_MAT_SINGLE)
814   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
815 #endif
816   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
817 
818   ierr = PetscFree(a);CHKERRQ(ierr);
819 
820   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
821   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
822   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
823   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
824   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
825   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
826   PetscFunctionReturn(0);
827 }
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatSetOption_SeqBAIJ"
831 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
832 {
833   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
834 
835   PetscFunctionBegin;
836   switch (op) {
837   case MAT_ROW_ORIENTED:
838     a->roworiented    = PETSC_TRUE;
839     break;
840   case MAT_COLUMN_ORIENTED:
841     a->roworiented    = PETSC_FALSE;
842     break;
843   case MAT_COLUMNS_SORTED:
844     a->sorted         = PETSC_TRUE;
845     break;
846   case MAT_COLUMNS_UNSORTED:
847     a->sorted         = PETSC_FALSE;
848     break;
849   case MAT_KEEP_ZEROED_ROWS:
850     a->keepzeroedrows = PETSC_TRUE;
851     break;
852   case MAT_NO_NEW_NONZERO_LOCATIONS:
853     a->nonew          = 1;
854     break;
855   case MAT_NEW_NONZERO_LOCATION_ERR:
856     a->nonew          = -1;
857     break;
858   case MAT_NEW_NONZERO_ALLOCATION_ERR:
859     a->nonew          = -2;
860     break;
861   case MAT_YES_NEW_NONZERO_LOCATIONS:
862     a->nonew          = 0;
863     break;
864   case MAT_ROWS_SORTED:
865   case MAT_ROWS_UNSORTED:
866   case MAT_YES_NEW_DIAGONALS:
867   case MAT_IGNORE_OFF_PROC_ENTRIES:
868   case MAT_USE_HASH_TABLE:
869     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
870     break;
871   case MAT_NO_NEW_DIAGONALS:
872     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
873   case MAT_SYMMETRIC:
874   case MAT_STRUCTURALLY_SYMMETRIC:
875   case MAT_NOT_SYMMETRIC:
876   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
877   case MAT_HERMITIAN:
878   case MAT_NOT_HERMITIAN:
879   case MAT_SYMMETRY_ETERNAL:
880   case MAT_NOT_SYMMETRY_ETERNAL:
881     break;
882   default:
883     SETERRQ(PETSC_ERR_SUP,"unknown option");
884   }
885   PetscFunctionReturn(0);
886 }
887 
888 #undef __FUNCT__
889 #define __FUNCT__ "MatGetRow_SeqBAIJ"
890 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
891 {
892   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
893   PetscErrorCode ierr;
894   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
895   MatScalar      *aa,*aa_i;
896   PetscScalar    *v_i;
897 
898   PetscFunctionBegin;
899   bs  = A->bs;
900   ai  = a->i;
901   aj  = a->j;
902   aa  = a->a;
903   bs2 = a->bs2;
904 
905   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
906 
907   bn  = row/bs;   /* Block number */
908   bp  = row % bs; /* Block Position */
909   M   = ai[bn+1] - ai[bn];
910   *nz = bs*M;
911 
912   if (v) {
913     *v = 0;
914     if (*nz) {
915       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
916       for (i=0; i<M; i++) { /* for each block in the block row */
917         v_i  = *v + i*bs;
918         aa_i = aa + bs2*(ai[bn] + i);
919         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
920       }
921     }
922   }
923 
924   if (idx) {
925     *idx = 0;
926     if (*nz) {
927       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
928       for (i=0; i<M; i++) { /* for each block in the block row */
929         idx_i = *idx + i*bs;
930         itmp  = bs*aj[ai[bn] + i];
931         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
932       }
933     }
934   }
935   PetscFunctionReturn(0);
936 }
937 
938 #undef __FUNCT__
939 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
940 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
941 {
942   PetscErrorCode ierr;
943 
944   PetscFunctionBegin;
945   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
946   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
947   PetscFunctionReturn(0);
948 }
949 
950 #undef __FUNCT__
951 #define __FUNCT__ "MatTranspose_SeqBAIJ"
952 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
953 {
954   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
955   Mat            C;
956   PetscErrorCode ierr;
957   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
958   PetscInt       *rows,*cols,bs2=a->bs2;
959   PetscScalar    *array;
960 
961   PetscFunctionBegin;
962   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
963   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
964   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
965 
966 #if defined(PETSC_USE_MAT_SINGLE)
967   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
968   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
969 #else
970   array = a->a;
971 #endif
972 
973   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
974   ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr);
975   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
976   ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
977   ierr = PetscFree(col);CHKERRQ(ierr);
978   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
979   cols = rows + bs;
980   for (i=0; i<mbs; i++) {
981     cols[0] = i*bs;
982     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
983     len = ai[i+1] - ai[i];
984     for (j=0; j<len; j++) {
985       rows[0] = (*aj++)*bs;
986       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
987       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
988       array += bs2;
989     }
990   }
991   ierr = PetscFree(rows);CHKERRQ(ierr);
992 #if defined(PETSC_USE_MAT_SINGLE)
993   ierr = PetscFree(array);
994 #endif
995 
996   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
997   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
998 
999   if (B) {
1000     *B = C;
1001   } else {
1002     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1003   }
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1009 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1010 {
1011   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1012   PetscErrorCode ierr;
1013   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1014   int            fd;
1015   PetscScalar    *aa;
1016   FILE           *file;
1017 
1018   PetscFunctionBegin;
1019   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1020   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1021   col_lens[0] = MAT_FILE_COOKIE;
1022 
1023   col_lens[1] = A->m;
1024   col_lens[2] = A->n;
1025   col_lens[3] = a->nz*bs2;
1026 
1027   /* store lengths of each row and write (including header) to file */
1028   count = 0;
1029   for (i=0; i<a->mbs; i++) {
1030     for (j=0; j<bs; j++) {
1031       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1032     }
1033   }
1034   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1035   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1036 
1037   /* store column indices (zero start index) */
1038   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1039   count = 0;
1040   for (i=0; i<a->mbs; i++) {
1041     for (j=0; j<bs; j++) {
1042       for (k=a->i[i]; k<a->i[i+1]; k++) {
1043         for (l=0; l<bs; l++) {
1044           jj[count++] = bs*a->j[k] + l;
1045         }
1046       }
1047     }
1048   }
1049   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1050   ierr = PetscFree(jj);CHKERRQ(ierr);
1051 
1052   /* store nonzero values */
1053   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1054   count = 0;
1055   for (i=0; i<a->mbs; i++) {
1056     for (j=0; j<bs; j++) {
1057       for (k=a->i[i]; k<a->i[i+1]; k++) {
1058         for (l=0; l<bs; l++) {
1059           aa[count++] = a->a[bs2*k + l*bs + j];
1060         }
1061       }
1062     }
1063   }
1064   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1065   ierr = PetscFree(aa);CHKERRQ(ierr);
1066 
1067   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1068   if (file) {
1069     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1070   }
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 #undef __FUNCT__
1075 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1076 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1077 {
1078   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1079   PetscErrorCode    ierr;
1080   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1081   PetscViewerFormat format;
1082 
1083   PetscFunctionBegin;
1084   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1085   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1086     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1087   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1088     Mat aij;
1089     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
1090     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1091     ierr = MatDestroy(aij);CHKERRQ(ierr);
1092   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1093      PetscFunctionReturn(0);
1094   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1095     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1096     for (i=0; i<a->mbs; i++) {
1097       for (j=0; j<bs; j++) {
1098         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1099         for (k=a->i[i]; k<a->i[i+1]; k++) {
1100           for (l=0; l<bs; l++) {
1101 #if defined(PETSC_USE_COMPLEX)
1102             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1103               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1104                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1105             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1106               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1107                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1108             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1109               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1110             }
1111 #else
1112             if (a->a[bs2*k + l*bs + j] != 0.0) {
1113               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1114             }
1115 #endif
1116           }
1117         }
1118         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1119       }
1120     }
1121     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1122   } else {
1123     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1124     for (i=0; i<a->mbs; i++) {
1125       for (j=0; j<bs; j++) {
1126         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1127         for (k=a->i[i]; k<a->i[i+1]; k++) {
1128           for (l=0; l<bs; l++) {
1129 #if defined(PETSC_USE_COMPLEX)
1130             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1131               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1132                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1133             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1134               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1135                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1136             } else {
1137               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1138             }
1139 #else
1140             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1141 #endif
1142           }
1143         }
1144         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1145       }
1146     }
1147     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1148   }
1149   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 #undef __FUNCT__
1154 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1155 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1156 {
1157   Mat            A = (Mat) Aa;
1158   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1159   PetscErrorCode ierr;
1160   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
1161   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1162   MatScalar      *aa;
1163   PetscViewer    viewer;
1164 
1165   PetscFunctionBegin;
1166 
1167   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1168   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1169 
1170   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1171 
1172   /* loop over matrix elements drawing boxes */
1173   color = PETSC_DRAW_BLUE;
1174   for (i=0,row=0; i<mbs; i++,row+=bs) {
1175     for (j=a->i[i]; j<a->i[i+1]; j++) {
1176       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1177       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1178       aa = a->a + j*bs2;
1179       for (k=0; k<bs; k++) {
1180         for (l=0; l<bs; l++) {
1181           if (PetscRealPart(*aa++) >=  0.) continue;
1182           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1183         }
1184       }
1185     }
1186   }
1187   color = PETSC_DRAW_CYAN;
1188   for (i=0,row=0; i<mbs; i++,row+=bs) {
1189     for (j=a->i[i]; j<a->i[i+1]; j++) {
1190       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1191       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1192       aa = a->a + j*bs2;
1193       for (k=0; k<bs; k++) {
1194         for (l=0; l<bs; l++) {
1195           if (PetscRealPart(*aa++) != 0.) continue;
1196           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1197         }
1198       }
1199     }
1200   }
1201 
1202   color = PETSC_DRAW_RED;
1203   for (i=0,row=0; i<mbs; i++,row+=bs) {
1204     for (j=a->i[i]; j<a->i[i+1]; j++) {
1205       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1206       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1207       aa = a->a + j*bs2;
1208       for (k=0; k<bs; k++) {
1209         for (l=0; l<bs; l++) {
1210           if (PetscRealPart(*aa++) <= 0.) continue;
1211           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1212         }
1213       }
1214     }
1215   }
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 #undef __FUNCT__
1220 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1221 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1222 {
1223   PetscErrorCode ierr;
1224   PetscReal      xl,yl,xr,yr,w,h;
1225   PetscDraw      draw;
1226   PetscTruth     isnull;
1227 
1228   PetscFunctionBegin;
1229 
1230   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1231   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1232 
1233   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1234   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1235   xr += w;    yr += h;  xl = -w;     yl = -h;
1236   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1237   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1238   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatView_SeqBAIJ"
1244 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1245 {
1246   PetscErrorCode ierr;
1247   PetscTruth     iascii,isbinary,isdraw;
1248 
1249   PetscFunctionBegin;
1250   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1251   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1252   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1253   if (iascii){
1254     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1255   } else if (isbinary) {
1256     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1257   } else if (isdraw) {
1258     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1259   } else {
1260     Mat B;
1261     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
1262     ierr = MatView(B,viewer);CHKERRQ(ierr);
1263     ierr = MatDestroy(B);CHKERRQ(ierr);
1264   }
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1271 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1272 {
1273   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1274   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1275   PetscInt    *ai = a->i,*ailen = a->ilen;
1276   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
1277   MatScalar   *ap,*aa = a->a,zero = 0.0;
1278 
1279   PetscFunctionBegin;
1280   for (k=0; k<m; k++) { /* loop over rows */
1281     row  = im[k]; brow = row/bs;
1282     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1283     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1284     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1285     nrow = ailen[brow];
1286     for (l=0; l<n; l++) { /* loop over columns */
1287       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1288       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1289       col  = in[l] ;
1290       bcol = col/bs;
1291       cidx = col%bs;
1292       ridx = row%bs;
1293       high = nrow;
1294       low  = 0; /* assume unsorted */
1295       while (high-low > 5) {
1296         t = (low+high)/2;
1297         if (rp[t] > bcol) high = t;
1298         else             low  = t;
1299       }
1300       for (i=low; i<high; i++) {
1301         if (rp[i] > bcol) break;
1302         if (rp[i] == bcol) {
1303           *v++ = ap[bs2*i+bs*cidx+ridx];
1304           goto finished;
1305         }
1306       }
1307       *v++ = zero;
1308       finished:;
1309     }
1310   }
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 #if defined(PETSC_USE_MAT_SINGLE)
1315 #undef __FUNCT__
1316 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1317 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1318 {
1319   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1320   PetscErrorCode ierr;
1321   PetscInt       i,N = m*n*b->bs2;
1322   MatScalar      *vsingle;
1323 
1324   PetscFunctionBegin;
1325   if (N > b->setvalueslen) {
1326     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1327     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1328     b->setvalueslen  = N;
1329   }
1330   vsingle = b->setvaluescopy;
1331   for (i=0; i<N; i++) {
1332     vsingle[i] = v[i];
1333   }
1334   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1335   PetscFunctionReturn(0);
1336 }
1337 #endif
1338 
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1342 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1343 {
1344   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1345   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1346   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1347   PetscErrorCode  ierr;
1348   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1349   PetscTruth      roworiented=a->roworiented;
1350   const MatScalar *value = v;
1351   MatScalar       *ap,*aa = a->a,*bap;
1352 
1353   PetscFunctionBegin;
1354   if (roworiented) {
1355     stepval = (n-1)*bs;
1356   } else {
1357     stepval = (m-1)*bs;
1358   }
1359   for (k=0; k<m; k++) { /* loop over added rows */
1360     row  = im[k];
1361     if (row < 0) continue;
1362 #if defined(PETSC_USE_BOPT_g)
1363     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1364 #endif
1365     rp   = aj + ai[row];
1366     ap   = aa + bs2*ai[row];
1367     rmax = imax[row];
1368     nrow = ailen[row];
1369     low  = 0;
1370     for (l=0; l<n; l++) { /* loop over added columns */
1371       if (in[l] < 0) continue;
1372 #if defined(PETSC_USE_BOPT_g)
1373       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1374 #endif
1375       col = in[l];
1376       if (roworiented) {
1377         value = v + k*(stepval+bs)*bs + l*bs;
1378       } else {
1379         value = v + l*(stepval+bs)*bs + k*bs;
1380       }
1381       if (!sorted) low = 0; high = nrow;
1382       while (high-low > 7) {
1383         t = (low+high)/2;
1384         if (rp[t] > col) high = t;
1385         else             low  = t;
1386       }
1387       for (i=low; i<high; i++) {
1388         if (rp[i] > col) break;
1389         if (rp[i] == col) {
1390           bap  = ap +  bs2*i;
1391           if (roworiented) {
1392             if (is == ADD_VALUES) {
1393               for (ii=0; ii<bs; ii++,value+=stepval) {
1394                 for (jj=ii; jj<bs2; jj+=bs) {
1395                   bap[jj] += *value++;
1396                 }
1397               }
1398             } else {
1399               for (ii=0; ii<bs; ii++,value+=stepval) {
1400                 for (jj=ii; jj<bs2; jj+=bs) {
1401                   bap[jj] = *value++;
1402                 }
1403               }
1404             }
1405           } else {
1406             if (is == ADD_VALUES) {
1407               for (ii=0; ii<bs; ii++,value+=stepval) {
1408                 for (jj=0; jj<bs; jj++) {
1409                   *bap++ += *value++;
1410                 }
1411               }
1412             } else {
1413               for (ii=0; ii<bs; ii++,value+=stepval) {
1414                 for (jj=0; jj<bs; jj++) {
1415                   *bap++  = *value++;
1416                 }
1417               }
1418             }
1419           }
1420           goto noinsert2;
1421         }
1422       }
1423       if (nonew == 1) goto noinsert2;
1424       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1425       if (nrow >= rmax) {
1426         /* there is no extra room in row, therefore enlarge */
1427         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1428         MatScalar *new_a;
1429 
1430         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1431 
1432         /* malloc new storage space */
1433         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1434 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1435         new_j   = (PetscInt*)(new_a + bs2*new_nz);
1436         new_i   = new_j + new_nz;
1437 
1438         /* copy over old data into new slots */
1439         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1440         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1441         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1442         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1443         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1444         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1445         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1446         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1447         /* free up old matrix storage */
1448         ierr = PetscFree(a->a);CHKERRQ(ierr);
1449         if (!a->singlemalloc) {
1450           ierr = PetscFree(a->i);CHKERRQ(ierr);
1451           ierr = PetscFree(a->j);CHKERRQ(ierr);
1452         }
1453         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1454         a->singlemalloc = PETSC_TRUE;
1455 
1456         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
1457         rmax = imax[row] = imax[row] + CHUNKSIZE;
1458         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));
1459         a->maxnz += bs2*CHUNKSIZE;
1460         a->reallocs++;
1461         a->nz++;
1462       }
1463       N = nrow++ - 1;
1464       /* shift up all the later entries in this row */
1465       for (ii=N; ii>=i; ii--) {
1466         rp[ii+1] = rp[ii];
1467         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1468       }
1469       if (N >= i) {
1470         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1471       }
1472       rp[i] = col;
1473       bap   = ap +  bs2*i;
1474       if (roworiented) {
1475         for (ii=0; ii<bs; ii++,value+=stepval) {
1476           for (jj=ii; jj<bs2; jj+=bs) {
1477             bap[jj] = *value++;
1478           }
1479         }
1480       } else {
1481         for (ii=0; ii<bs; ii++,value+=stepval) {
1482           for (jj=0; jj<bs; jj++) {
1483             *bap++  = *value++;
1484           }
1485         }
1486       }
1487       noinsert2:;
1488       low = i;
1489     }
1490     ailen[row] = nrow;
1491   }
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 #undef __FUNCT__
1496 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1497 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1498 {
1499   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1500   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1501   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
1502   PetscErrorCode ierr;
1503   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1504   MatScalar      *aa = a->a,*ap;
1505 
1506   PetscFunctionBegin;
1507   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1508 
1509   if (m) rmax = ailen[0];
1510   for (i=1; i<mbs; i++) {
1511     /* move each row back by the amount of empty slots (fshift) before it*/
1512     fshift += imax[i-1] - ailen[i-1];
1513     rmax   = PetscMax(rmax,ailen[i]);
1514     if (fshift) {
1515       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1516       N = ailen[i];
1517       for (j=0; j<N; j++) {
1518         ip[j-fshift] = ip[j];
1519         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1520       }
1521     }
1522     ai[i] = ai[i-1] + ailen[i-1];
1523   }
1524   if (mbs) {
1525     fshift += imax[mbs-1] - ailen[mbs-1];
1526     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1527   }
1528   /* reset ilen and imax for each row */
1529   for (i=0; i<mbs; i++) {
1530     ailen[i] = imax[i] = ai[i+1] - ai[i];
1531   }
1532   a->nz = ai[mbs];
1533 
1534   /* diagonals may have moved, so kill the diagonal pointers */
1535   a->idiagvalid = PETSC_FALSE;
1536   if (fshift && a->diag) {
1537     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1538     PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1539     a->diag = 0;
1540   }
1541   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2);
1542   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs);
1543   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax);
1544   a->reallocs          = 0;
1545   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1546 
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 
1551 
1552 /*
1553    This function returns an array of flags which indicate the locations of contiguous
1554    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1555    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1556    Assume: sizes should be long enough to hold all the values.
1557 */
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1560 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1561 {
1562   PetscInt   i,j,k,row;
1563   PetscTruth flg;
1564 
1565   PetscFunctionBegin;
1566   for (i=0,j=0; i<n; j++) {
1567     row = idx[i];
1568     if (row%bs!=0) { /* Not the begining of a block */
1569       sizes[j] = 1;
1570       i++;
1571     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1572       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1573       i++;
1574     } else { /* Begining of the block, so check if the complete block exists */
1575       flg = PETSC_TRUE;
1576       for (k=1; k<bs; k++) {
1577         if (row+k != idx[i+k]) { /* break in the block */
1578           flg = PETSC_FALSE;
1579           break;
1580         }
1581       }
1582       if (flg == PETSC_TRUE) { /* No break in the bs */
1583         sizes[j] = bs;
1584         i+= bs;
1585       } else {
1586         sizes[j] = 1;
1587         i++;
1588       }
1589     }
1590   }
1591   *bs_max = j;
1592   PetscFunctionReturn(0);
1593 }
1594 
1595 #undef __FUNCT__
1596 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1597 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1598 {
1599   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1600   PetscErrorCode ierr;
1601   PetscInt       i,j,k,count,is_n,*is_idx,*rows;
1602   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
1603   PetscScalar    zero = 0.0;
1604   MatScalar      *aa;
1605 
1606   PetscFunctionBegin;
1607   /* Make a copy of the IS and  sort it */
1608   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1609   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1610 
1611   /* allocate memory for rows,sizes */
1612   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1613   sizes = rows + is_n;
1614 
1615   /* copy IS values to rows, and sort them */
1616   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1617   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1618   if (baij->keepzeroedrows) {
1619     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1620     bs_max = is_n;
1621   } else {
1622     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1623   }
1624   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1625 
1626   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1627     row   = rows[j];
1628     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1629     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1630     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1631     if (sizes[i] == bs && !baij->keepzeroedrows) {
1632       if (diag) {
1633         if (baij->ilen[row/bs] > 0) {
1634           baij->ilen[row/bs]       = 1;
1635           baij->j[baij->i[row/bs]] = row/bs;
1636           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1637         }
1638         /* Now insert all the diagonal values for this bs */
1639         for (k=0; k<bs; k++) {
1640           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1641         }
1642       } else { /* (!diag) */
1643         baij->ilen[row/bs] = 0;
1644       } /* end (!diag) */
1645     } else { /* (sizes[i] != bs) */
1646 #if defined (PETSC_USE_DEBUG)
1647       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1648 #endif
1649       for (k=0; k<count; k++) {
1650         aa[0] =  zero;
1651         aa    += bs;
1652       }
1653       if (diag) {
1654         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1655       }
1656     }
1657   }
1658 
1659   ierr = PetscFree(rows);CHKERRQ(ierr);
1660   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 #undef __FUNCT__
1665 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1666 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1667 {
1668   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1669   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1670   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1671   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
1672   PetscErrorCode ierr;
1673   PetscInt       ridx,cidx,bs2=a->bs2;
1674   PetscTruth     roworiented=a->roworiented;
1675   MatScalar      *ap,value,*aa=a->a,*bap;
1676 
1677   PetscFunctionBegin;
1678   for (k=0; k<m; k++) { /* loop over added rows */
1679     row  = im[k]; brow = row/bs;
1680     if (row < 0) continue;
1681 #if defined(PETSC_USE_BOPT_g)
1682     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1683 #endif
1684     rp   = aj + ai[brow];
1685     ap   = aa + bs2*ai[brow];
1686     rmax = imax[brow];
1687     nrow = ailen[brow];
1688     low  = 0;
1689     for (l=0; l<n; l++) { /* loop over added columns */
1690       if (in[l] < 0) continue;
1691 #if defined(PETSC_USE_BOPT_g)
1692       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1693 #endif
1694       col = in[l]; bcol = col/bs;
1695       ridx = row % bs; cidx = col % bs;
1696       if (roworiented) {
1697         value = v[l + k*n];
1698       } else {
1699         value = v[k + l*m];
1700       }
1701       if (!sorted) low = 0; high = nrow;
1702       while (high-low > 7) {
1703         t = (low+high)/2;
1704         if (rp[t] > bcol) high = t;
1705         else              low  = t;
1706       }
1707       for (i=low; i<high; i++) {
1708         if (rp[i] > bcol) break;
1709         if (rp[i] == bcol) {
1710           bap  = ap +  bs2*i + bs*cidx + ridx;
1711           if (is == ADD_VALUES) *bap += value;
1712           else                  *bap  = value;
1713           goto noinsert1;
1714         }
1715       }
1716       if (nonew == 1) goto noinsert1;
1717       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1718       if (nrow >= rmax) {
1719         /* there is no extra room in row, therefore enlarge */
1720         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1721         MatScalar *new_a;
1722 
1723         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1724 
1725         /* Malloc new storage space */
1726         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1727 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1728         new_j   = (PetscInt*)(new_a + bs2*new_nz);
1729         new_i   = new_j + new_nz;
1730 
1731         /* copy over old data into new slots */
1732         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1733         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1734         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1735         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1736         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1737         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1738         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1739         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1740         /* free up old matrix storage */
1741         ierr = PetscFree(a->a);CHKERRQ(ierr);
1742         if (!a->singlemalloc) {
1743           ierr = PetscFree(a->i);CHKERRQ(ierr);
1744           ierr = PetscFree(a->j);CHKERRQ(ierr);
1745         }
1746         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1747         a->singlemalloc = PETSC_TRUE;
1748 
1749         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1750         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1751         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));
1752         a->maxnz += bs2*CHUNKSIZE;
1753         a->reallocs++;
1754         a->nz++;
1755       }
1756       N = nrow++ - 1;
1757       /* shift up all the later entries in this row */
1758       for (ii=N; ii>=i; ii--) {
1759         rp[ii+1] = rp[ii];
1760         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1761       }
1762       if (N>=i) {
1763         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1764       }
1765       rp[i]                      = bcol;
1766       ap[bs2*i + bs*cidx + ridx] = value;
1767       noinsert1:;
1768       low = i;
1769     }
1770     ailen[brow] = nrow;
1771   }
1772   PetscFunctionReturn(0);
1773 }
1774 
1775 
1776 #undef __FUNCT__
1777 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1778 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1779 {
1780   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1781   Mat            outA;
1782   PetscErrorCode ierr;
1783   PetscTruth     row_identity,col_identity;
1784 
1785   PetscFunctionBegin;
1786   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1787   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1788   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1789   if (!row_identity || !col_identity) {
1790     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1791   }
1792 
1793   outA          = inA;
1794   inA->factor   = FACTOR_LU;
1795 
1796   if (!a->diag) {
1797     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1798   }
1799 
1800   a->row        = row;
1801   a->col        = col;
1802   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1803   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1804 
1805   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1806   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1807   PetscLogObjectParent(inA,a->icol);
1808 
1809   /*
1810       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1811       for ILU(0) factorization with natural ordering
1812   */
1813   if (inA->bs < 8) {
1814     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1815   } else {
1816     if (!a->solve_work) {
1817       ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1818       PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));
1819     }
1820   }
1821 
1822   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1823 
1824   PetscFunctionReturn(0);
1825 }
1826 #undef __FUNCT__
1827 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1828 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1829 {
1830   static PetscTruth called = PETSC_FALSE;
1831   MPI_Comm          comm = A->comm;
1832   PetscErrorCode    ierr;
1833 
1834   PetscFunctionBegin;
1835   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1836   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1837   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 EXTERN_C_BEGIN
1842 #undef __FUNCT__
1843 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1844 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1845 {
1846   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1847   PetscInt    i,nz,nbs;
1848 
1849   PetscFunctionBegin;
1850   nz  = baij->maxnz/baij->bs2;
1851   nbs = baij->nbs;
1852   for (i=0; i<nz; i++) {
1853     baij->j[i] = indices[i];
1854   }
1855   baij->nz = nz;
1856   for (i=0; i<nbs; i++) {
1857     baij->ilen[i] = baij->imax[i];
1858   }
1859 
1860   PetscFunctionReturn(0);
1861 }
1862 EXTERN_C_END
1863 
1864 #undef __FUNCT__
1865 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1866 /*@
1867     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1868        in the matrix.
1869 
1870   Input Parameters:
1871 +  mat - the SeqBAIJ matrix
1872 -  indices - the column indices
1873 
1874   Level: advanced
1875 
1876   Notes:
1877     This can be called if you have precomputed the nonzero structure of the
1878   matrix and want to provide it to the matrix object to improve the performance
1879   of the MatSetValues() operation.
1880 
1881     You MUST have set the correct numbers of nonzeros per row in the call to
1882   MatCreateSeqBAIJ().
1883 
1884     MUST be called before any calls to MatSetValues();
1885 
1886 @*/
1887 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1888 {
1889   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1890 
1891   PetscFunctionBegin;
1892   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1893   PetscValidPointer(indices,2);
1894   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1895   if (f) {
1896     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1897   } else {
1898     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1899   }
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1905 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1906 {
1907   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1908   PetscErrorCode ierr;
1909   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1910   PetscReal      atmp;
1911   PetscScalar    *x,zero = 0.0;
1912   MatScalar      *aa;
1913   PetscInt       ncols,brow,krow,kcol;
1914 
1915   PetscFunctionBegin;
1916   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1917   bs   = A->bs;
1918   aa   = a->a;
1919   ai   = a->i;
1920   aj   = a->j;
1921   mbs = a->mbs;
1922 
1923   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1924   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1925   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1926   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1927   for (i=0; i<mbs; i++) {
1928     ncols = ai[1] - ai[0]; ai++;
1929     brow  = bs*i;
1930     for (j=0; j<ncols; j++){
1931       /* bcol = bs*(*aj); */
1932       for (kcol=0; kcol<bs; kcol++){
1933         for (krow=0; krow<bs; krow++){
1934           atmp = PetscAbsScalar(*aa); aa++;
1935           row = brow + krow;    /* row index */
1936           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1937           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1938         }
1939       }
1940       aj++;
1941     }
1942   }
1943   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 #undef __FUNCT__
1948 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1949 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1950 {
1951   PetscErrorCode ierr;
1952 
1953   PetscFunctionBegin;
1954   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 #undef __FUNCT__
1959 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1960 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1961 {
1962   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1963   PetscFunctionBegin;
1964   *array = a->a;
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1970 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1971 {
1972   PetscFunctionBegin;
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 #include "petscblaslapack.h"
1977 #undef __FUNCT__
1978 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1979 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1980 {
1981   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1982   PetscErrorCode ierr;
1983   PetscInt       i,bs=Y->bs,j,bs2;
1984   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1985 
1986   PetscFunctionBegin;
1987   if (str == SAME_NONZERO_PATTERN) {
1988     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1989   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1990     if (y->xtoy && y->XtoY != X) {
1991       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1992       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1993     }
1994     if (!y->xtoy) { /* get xtoy */
1995       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1996       y->XtoY = X;
1997     }
1998     bs2 = bs*bs;
1999     for (i=0; i<x->nz; i++) {
2000       j = 0;
2001       while (j < bs2){
2002         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
2003         j++;
2004       }
2005     }
2006     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2007   } else {
2008     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
2009   }
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 /* -------------------------------------------------------------------*/
2014 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2015        MatGetRow_SeqBAIJ,
2016        MatRestoreRow_SeqBAIJ,
2017        MatMult_SeqBAIJ_N,
2018 /* 4*/ MatMultAdd_SeqBAIJ_N,
2019        MatMultTranspose_SeqBAIJ,
2020        MatMultTransposeAdd_SeqBAIJ,
2021        MatSolve_SeqBAIJ_N,
2022        0,
2023        0,
2024 /*10*/ 0,
2025        MatLUFactor_SeqBAIJ,
2026        0,
2027        0,
2028        MatTranspose_SeqBAIJ,
2029 /*15*/ MatGetInfo_SeqBAIJ,
2030        MatEqual_SeqBAIJ,
2031        MatGetDiagonal_SeqBAIJ,
2032        MatDiagonalScale_SeqBAIJ,
2033        MatNorm_SeqBAIJ,
2034 /*20*/ 0,
2035        MatAssemblyEnd_SeqBAIJ,
2036        0,
2037        MatSetOption_SeqBAIJ,
2038        MatZeroEntries_SeqBAIJ,
2039 /*25*/ MatZeroRows_SeqBAIJ,
2040        MatLUFactorSymbolic_SeqBAIJ,
2041        MatLUFactorNumeric_SeqBAIJ_N,
2042        0,
2043        0,
2044 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2045        MatILUFactorSymbolic_SeqBAIJ,
2046        0,
2047        MatGetArray_SeqBAIJ,
2048        MatRestoreArray_SeqBAIJ,
2049 /*35*/ MatDuplicate_SeqBAIJ,
2050        0,
2051        0,
2052        MatILUFactor_SeqBAIJ,
2053        0,
2054 /*40*/ MatAXPY_SeqBAIJ,
2055        MatGetSubMatrices_SeqBAIJ,
2056        MatIncreaseOverlap_SeqBAIJ,
2057        MatGetValues_SeqBAIJ,
2058        0,
2059 /*45*/ MatPrintHelp_SeqBAIJ,
2060        MatScale_SeqBAIJ,
2061        0,
2062        0,
2063        0,
2064 /*50*/ 0,
2065        MatGetRowIJ_SeqBAIJ,
2066        MatRestoreRowIJ_SeqBAIJ,
2067        0,
2068        0,
2069 /*55*/ 0,
2070        0,
2071        0,
2072        0,
2073        MatSetValuesBlocked_SeqBAIJ,
2074 /*60*/ MatGetSubMatrix_SeqBAIJ,
2075        MatDestroy_SeqBAIJ,
2076        MatView_SeqBAIJ,
2077        MatGetPetscMaps_Petsc,
2078        0,
2079 /*65*/ 0,
2080        0,
2081        0,
2082        0,
2083        0,
2084 /*70*/ MatGetRowMax_SeqBAIJ,
2085        MatConvert_Basic,
2086        0,
2087        0,
2088        0,
2089 /*75*/ 0,
2090        0,
2091        0,
2092        0,
2093        0,
2094 /*80*/ 0,
2095        0,
2096        0,
2097        0,
2098        MatLoad_SeqBAIJ,
2099 /*85*/ 0,
2100        0,
2101        0,
2102        0,
2103        0,
2104 /*90*/ 0,
2105        0,
2106        0,
2107        0,
2108        0,
2109 /*95*/ 0,
2110        0,
2111        0,
2112        0};
2113 
2114 EXTERN_C_BEGIN
2115 #undef __FUNCT__
2116 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2117 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2118 {
2119   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2120   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2121   PetscErrorCode ierr;
2122 
2123   PetscFunctionBegin;
2124   if (aij->nonew != 1) {
2125     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2126   }
2127 
2128   /* allocate space for values if not already there */
2129   if (!aij->saved_values) {
2130     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2131   }
2132 
2133   /* copy values over */
2134   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2135   PetscFunctionReturn(0);
2136 }
2137 EXTERN_C_END
2138 
2139 EXTERN_C_BEGIN
2140 #undef __FUNCT__
2141 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2142 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2143 {
2144   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2145   PetscErrorCode ierr;
2146   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2147 
2148   PetscFunctionBegin;
2149   if (aij->nonew != 1) {
2150     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2151   }
2152   if (!aij->saved_values) {
2153     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2154   }
2155 
2156   /* copy values over */
2157   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2158   PetscFunctionReturn(0);
2159 }
2160 EXTERN_C_END
2161 
2162 EXTERN_C_BEGIN
2163 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*);
2164 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*);
2165 EXTERN_C_END
2166 
2167 EXTERN_C_BEGIN
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2170 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2171 {
2172   Mat_SeqBAIJ    *b;
2173   PetscErrorCode ierr;
2174   PetscInt       i,len,mbs,nbs,bs2,newbs = bs;
2175   PetscTruth     flg;
2176 
2177   PetscFunctionBegin;
2178 
2179   B->preallocated = PETSC_TRUE;
2180   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2181   if (nnz && newbs != bs) {
2182     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2183   }
2184   bs   = newbs;
2185 
2186   mbs  = B->m/bs;
2187   nbs  = B->n/bs;
2188   bs2  = bs*bs;
2189 
2190   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2191     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2192   }
2193 
2194   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2195   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2196   if (nnz) {
2197     for (i=0; i<mbs; i++) {
2198       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2199       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2200     }
2201   }
2202 
2203   b       = (Mat_SeqBAIJ*)B->data;
2204   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2205   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2206   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2207   if (!flg) {
2208     switch (bs) {
2209     case 1:
2210       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2211       B->ops->mult            = MatMult_SeqBAIJ_1;
2212       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2213       break;
2214     case 2:
2215       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2216       B->ops->mult            = MatMult_SeqBAIJ_2;
2217       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2218       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2219       break;
2220     case 3:
2221       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2222       B->ops->mult            = MatMult_SeqBAIJ_3;
2223       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2224       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2225       break;
2226     case 4:
2227       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2228       B->ops->mult            = MatMult_SeqBAIJ_4;
2229       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2230       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2231       break;
2232     case 5:
2233       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2234       B->ops->mult            = MatMult_SeqBAIJ_5;
2235       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2236       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2237       break;
2238     case 6:
2239       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2240       B->ops->mult            = MatMult_SeqBAIJ_6;
2241       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2242       break;
2243     case 7:
2244       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2245       B->ops->mult            = MatMult_SeqBAIJ_7;
2246       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2247       break;
2248     default:
2249       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2250       B->ops->mult            = MatMult_SeqBAIJ_N;
2251       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2252       break;
2253     }
2254   }
2255   B->bs      = bs;
2256   b->mbs     = mbs;
2257   b->nbs     = nbs;
2258   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr);
2259   if (!nnz) {
2260     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2261     else if (nz <= 0)        nz = 1;
2262     for (i=0; i<mbs; i++) b->imax[i] = nz;
2263     nz = nz*mbs;
2264   } else {
2265     nz = 0;
2266     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2267   }
2268 
2269   /* allocate the matrix space */
2270   len   = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt);
2271   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2272   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2273   b->j  = (PetscInt*)(b->a + nz*bs2);
2274   ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2275   b->i  = b->j + nz;
2276   b->singlemalloc = PETSC_TRUE;
2277 
2278   b->i[0] = 0;
2279   for (i=1; i<mbs+1; i++) {
2280     b->i[i] = b->i[i-1] + b->imax[i-1];
2281   }
2282 
2283   /* b->ilen will count nonzeros in each block row so far. */
2284   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr);
2285   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2286   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2287 
2288   B->bs               = bs;
2289   b->bs2              = bs2;
2290   b->mbs              = mbs;
2291   b->nz               = 0;
2292   b->maxnz            = nz*bs2;
2293   B->info.nz_unneeded = (PetscReal)b->maxnz;
2294   PetscFunctionReturn(0);
2295 }
2296 EXTERN_C_END
2297 
2298 /*MC
2299    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2300    block sparse compressed row format.
2301 
2302    Options Database Keys:
2303 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2304 
2305   Level: beginner
2306 
2307 .seealso: MatCreateSeqBAIJ
2308 M*/
2309 
2310 EXTERN_C_BEGIN
2311 #undef __FUNCT__
2312 #define __FUNCT__ "MatCreate_SeqBAIJ"
2313 PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2314 {
2315   PetscErrorCode ierr;
2316   PetscMPIInt    size;
2317   Mat_SeqBAIJ    *b;
2318 
2319   PetscFunctionBegin;
2320   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2321   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2322 
2323   B->m = B->M = PetscMax(B->m,B->M);
2324   B->n = B->N = PetscMax(B->n,B->N);
2325   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2326   B->data = (void*)b;
2327   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2328   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2329   B->factor           = 0;
2330   B->lupivotthreshold = 1.0;
2331   B->mapping          = 0;
2332   b->row              = 0;
2333   b->col              = 0;
2334   b->icol             = 0;
2335   b->reallocs         = 0;
2336   b->saved_values     = 0;
2337 #if defined(PETSC_USE_MAT_SINGLE)
2338   b->setvalueslen     = 0;
2339   b->setvaluescopy    = PETSC_NULL;
2340 #endif
2341 
2342   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2343   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2344 
2345   b->sorted           = PETSC_FALSE;
2346   b->roworiented      = PETSC_TRUE;
2347   b->nonew            = 0;
2348   b->diag             = 0;
2349   b->solve_work       = 0;
2350   b->mult_work        = 0;
2351   B->spptr            = 0;
2352   B->info.nz_unneeded = (PetscReal)b->maxnz;
2353   b->keepzeroedrows   = PETSC_FALSE;
2354   b->xtoy              = 0;
2355   b->XtoY              = 0;
2356 
2357   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2358                                      "MatStoreValues_SeqBAIJ",
2359                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2360   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2361                                      "MatRetrieveValues_SeqBAIJ",
2362                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2363   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2364                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2365                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2366   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2367                                      "MatConvert_SeqBAIJ_SeqAIJ",
2368                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2369 #if !defined(PETSC_USE_64BIT_INT)
2370   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2371                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2372                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2373 #endif
2374   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2375                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2376                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2377   PetscFunctionReturn(0);
2378 }
2379 EXTERN_C_END
2380 
2381 #undef __FUNCT__
2382 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2383 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2384 {
2385   Mat            C;
2386   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2387   PetscErrorCode ierr;
2388   PetscInt       i,len,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2389 
2390   PetscFunctionBegin;
2391   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2392 
2393   *B = 0;
2394   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
2395   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2396   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2397   c    = (Mat_SeqBAIJ*)C->data;
2398 
2399   C->M   = A->M;
2400   C->N   = A->N;
2401   C->bs  = A->bs;
2402   c->bs2 = a->bs2;
2403   c->mbs = a->mbs;
2404   c->nbs = a->nbs;
2405 
2406   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
2407   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr);
2408   for (i=0; i<mbs; i++) {
2409     c->imax[i] = a->imax[i];
2410     c->ilen[i] = a->ilen[i];
2411   }
2412 
2413   /* allocate the matrix space */
2414   c->singlemalloc = PETSC_TRUE;
2415   len  = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt));
2416   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
2417   c->j = (PetscInt*)(c->a + nz*bs2);
2418   c->i = c->j + nz;
2419   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2420   if (mbs > 0) {
2421     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2422     if (cpvalues == MAT_COPY_VALUES) {
2423       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2424     } else {
2425       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2426     }
2427   }
2428 
2429   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2430   c->sorted      = a->sorted;
2431   c->roworiented = a->roworiented;
2432   c->nonew       = a->nonew;
2433 
2434   if (a->diag) {
2435     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2436     PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
2437     for (i=0; i<mbs; i++) {
2438       c->diag[i] = a->diag[i];
2439     }
2440   } else c->diag        = 0;
2441   c->nz                 = a->nz;
2442   c->maxnz              = a->maxnz;
2443   c->solve_work         = 0;
2444   c->mult_work          = 0;
2445   C->preallocated       = PETSC_TRUE;
2446   C->assembled          = PETSC_TRUE;
2447   *B = C;
2448   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 #undef __FUNCT__
2453 #define __FUNCT__ "MatLoad_SeqBAIJ"
2454 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2455 {
2456   Mat_SeqBAIJ    *a;
2457   Mat            B;
2458   PetscErrorCode ierr;
2459   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2460   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2461   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2462   PetscInt       *masked,nmask,tmp,bs2,ishift;
2463   PetscMPIInt    size;
2464   int            fd;
2465   PetscScalar    *aa;
2466   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2467 
2468   PetscFunctionBegin;
2469   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2470   bs2  = bs*bs;
2471 
2472   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2473   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2474   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2475   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2476   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2477   M = header[1]; N = header[2]; nz = header[3];
2478 
2479   if (header[3] < 0) {
2480     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2481   }
2482 
2483   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2484 
2485   /*
2486      This code adds extra rows to make sure the number of rows is
2487     divisible by the blocksize
2488   */
2489   mbs        = M/bs;
2490   extra_rows = bs - M + bs*(mbs);
2491   if (extra_rows == bs) extra_rows = 0;
2492   else                  mbs++;
2493   if (extra_rows) {
2494     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2495   }
2496 
2497   /* read in row lengths */
2498   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2499   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2500   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2501 
2502   /* read in column indices */
2503   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2504   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2505   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2506 
2507   /* loop over row lengths determining block row lengths */
2508   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2509   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2510   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2511   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2512   masked   = mask + mbs;
2513   rowcount = 0; nzcount = 0;
2514   for (i=0; i<mbs; i++) {
2515     nmask = 0;
2516     for (j=0; j<bs; j++) {
2517       kmax = rowlengths[rowcount];
2518       for (k=0; k<kmax; k++) {
2519         tmp = jj[nzcount++]/bs;
2520         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2521       }
2522       rowcount++;
2523     }
2524     browlengths[i] += nmask;
2525     /* zero out the mask elements we set */
2526     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2527   }
2528 
2529   /* create our matrix */
2530   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2531   ierr = MatSetType(B,type);CHKERRQ(ierr);
2532   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
2533   a = (Mat_SeqBAIJ*)B->data;
2534 
2535   /* set matrix "i" values */
2536   a->i[0] = 0;
2537   for (i=1; i<= mbs; i++) {
2538     a->i[i]      = a->i[i-1] + browlengths[i-1];
2539     a->ilen[i-1] = browlengths[i-1];
2540   }
2541   a->nz         = 0;
2542   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2543 
2544   /* read in nonzero values */
2545   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2546   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2547   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2548 
2549   /* set "a" and "j" values into matrix */
2550   nzcount = 0; jcount = 0;
2551   for (i=0; i<mbs; i++) {
2552     nzcountb = nzcount;
2553     nmask    = 0;
2554     for (j=0; j<bs; j++) {
2555       kmax = rowlengths[i*bs+j];
2556       for (k=0; k<kmax; k++) {
2557         tmp = jj[nzcount++]/bs;
2558 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2559       }
2560     }
2561     /* sort the masked values */
2562     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2563 
2564     /* set "j" values into matrix */
2565     maskcount = 1;
2566     for (j=0; j<nmask; j++) {
2567       a->j[jcount++]  = masked[j];
2568       mask[masked[j]] = maskcount++;
2569     }
2570     /* set "a" values into matrix */
2571     ishift = bs2*a->i[i];
2572     for (j=0; j<bs; j++) {
2573       kmax = rowlengths[i*bs+j];
2574       for (k=0; k<kmax; k++) {
2575         tmp       = jj[nzcountb]/bs ;
2576         block     = mask[tmp] - 1;
2577         point     = jj[nzcountb] - bs*tmp;
2578         idx       = ishift + bs2*block + j + bs*point;
2579         a->a[idx] = (MatScalar)aa[nzcountb++];
2580       }
2581     }
2582     /* zero out the mask elements we set */
2583     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2584   }
2585   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2586 
2587   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2588   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2589   ierr = PetscFree(aa);CHKERRQ(ierr);
2590   ierr = PetscFree(jj);CHKERRQ(ierr);
2591   ierr = PetscFree(mask);CHKERRQ(ierr);
2592 
2593   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2594   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2595   ierr = MatView_Private(B);CHKERRQ(ierr);
2596 
2597   *A = B;
2598   PetscFunctionReturn(0);
2599 }
2600 
2601 #undef __FUNCT__
2602 #define __FUNCT__ "MatCreateSeqBAIJ"
2603 /*@C
2604    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2605    compressed row) format.  For good matrix assembly performance the
2606    user should preallocate the matrix storage by setting the parameter nz
2607    (or the array nnz).  By setting these parameters accurately, performance
2608    during matrix assembly can be increased by more than a factor of 50.
2609 
2610    Collective on MPI_Comm
2611 
2612    Input Parameters:
2613 +  comm - MPI communicator, set to PETSC_COMM_SELF
2614 .  bs - size of block
2615 .  m - number of rows
2616 .  n - number of columns
2617 .  nz - number of nonzero blocks  per block row (same for all rows)
2618 -  nnz - array containing the number of nonzero blocks in the various block rows
2619          (possibly different for each block row) or PETSC_NULL
2620 
2621    Output Parameter:
2622 .  A - the matrix
2623 
2624    Options Database Keys:
2625 .   -mat_no_unroll - uses code that does not unroll the loops in the
2626                      block calculations (much slower)
2627 .    -mat_block_size - size of the blocks to use
2628 
2629    Level: intermediate
2630 
2631    Notes:
2632    If the nnz parameter is given then the nz parameter is ignored
2633 
2634    A nonzero block is any block that as 1 or more nonzeros in it
2635 
2636    The block AIJ format is fully compatible with standard Fortran 77
2637    storage.  That is, the stored row and column indices can begin at
2638    either one (as in Fortran) or zero.  See the users' manual for details.
2639 
2640    Specify the preallocated storage with either nz or nnz (not both).
2641    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2642    allocation.  For additional details, see the users manual chapter on
2643    matrices.
2644 
2645 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2646 @*/
2647 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2648 {
2649   PetscErrorCode ierr;
2650 
2651   PetscFunctionBegin;
2652   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2653   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2654   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2655   PetscFunctionReturn(0);
2656 }
2657 
2658 #undef __FUNCT__
2659 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2660 /*@C
2661    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2662    per row in the matrix. For good matrix assembly performance the
2663    user should preallocate the matrix storage by setting the parameter nz
2664    (or the array nnz).  By setting these parameters accurately, performance
2665    during matrix assembly can be increased by more than a factor of 50.
2666 
2667    Collective on MPI_Comm
2668 
2669    Input Parameters:
2670 +  A - the matrix
2671 .  bs - size of block
2672 .  nz - number of block nonzeros per block row (same for all rows)
2673 -  nnz - array containing the number of block nonzeros in the various block rows
2674          (possibly different for each block row) or PETSC_NULL
2675 
2676    Options Database Keys:
2677 .   -mat_no_unroll - uses code that does not unroll the loops in the
2678                      block calculations (much slower)
2679 .    -mat_block_size - size of the blocks to use
2680 
2681    Level: intermediate
2682 
2683    Notes:
2684    If the nnz parameter is given then the nz parameter is ignored
2685 
2686    The block AIJ format is fully compatible with standard Fortran 77
2687    storage.  That is, the stored row and column indices can begin at
2688    either one (as in Fortran) or zero.  See the users' manual for details.
2689 
2690    Specify the preallocated storage with either nz or nnz (not both).
2691    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2692    allocation.  For additional details, see the users manual chapter on
2693    matrices.
2694 
2695 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2696 @*/
2697 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2698 {
2699   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2700 
2701   PetscFunctionBegin;
2702   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2703   if (f) {
2704     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2705   }
2706   PetscFunctionReturn(0);
2707 }
2708 
2709