xref: /petsc/src/mat/impls/baij/seq/baij.c (revision a58c3bc3391eee32bc3fd94ac7edeea38fe57aae)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129   its = its*lits;
130   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
134   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
135 
136   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
137 
138   diag  = a->diag;
139   idiag = a->idiag;
140   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
141   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
142 
143   if (flag & SOR_ZERO_INITIAL_GUESS) {
144     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
146       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
147       i2     = 2;
148       idiag += 4;
149       for (i=1; i<m; i++) {
150 	v     = aa + 4*ai[i];
151 	vi    = aj + ai[i];
152 	nz    = diag[i] - ai[i];
153 	s1    = b[i2]; s2 = b[i2+1];
154 	while (nz--) {
155 	  idx  = 2*(*vi++);
156 	  x1   = x[idx]; x2 = x[1+idx];
157 	  s1  -= v[0]*x1 + v[2]*x2;
158 	  s2  -= v[1]*x1 + v[3]*x2;
159 	  v   += 4;
160 	}
161 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
162 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
163         idiag   += 4;
164         i2      += 2;
165       }
166       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
167       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
168     }
169     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
170         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
171       i2    = 0;
172       mdiag = a->idiag+4*a->mbs;
173       for (i=0; i<m; i++) {
174         x1      = x[i2]; x2 = x[i2+1];
175         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
176         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
177         mdiag  += 4;
178         i2     += 2;
179       }
180       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
181     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
182       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
183     }
184     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
185       idiag   = a->idiag+4*a->mbs - 4;
186       i2      = 2*m - 2;
187       x1      = x[i2]; x2 = x[i2+1];
188       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
189       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
190       idiag -= 4;
191       i2    -= 2;
192       for (i=m-2; i>=0; i--) {
193 	v     = aa + 4*(diag[i]+1);
194 	vi    = aj + diag[i] + 1;
195 	nz    = ai[i+1] - diag[i] - 1;
196 	s1    = x[i2]; s2 = x[i2+1];
197 	while (nz--) {
198 	  idx  = 2*(*vi++);
199 	  x1   = x[idx]; x2 = x[1+idx];
200 	  s1  -= v[0]*x1 + v[2]*x2;
201 	  s2  -= v[1]*x1 + v[3]*x2;
202 	  v   += 4;
203 	}
204 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
205 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
206         idiag   -= 4;
207         i2      -= 2;
208       }
209       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
210     }
211   } else {
212     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
213   }
214   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
215   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
216   PetscFunctionReturn(0);
217 }
218 
219 #undef __FUNCT__
220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
222 {
223   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
224   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
225   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   its = its*lits;
232   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
233   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
234   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
235   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
236   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
237 
238   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
239 
240   diag  = a->diag;
241   idiag = a->idiag;
242   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
243   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
244 
245   if (flag & SOR_ZERO_INITIAL_GUESS) {
246     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
247       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
248       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
249       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
250       i2     = 3;
251       idiag += 9;
252       for (i=1; i<m; i++) {
253 	v     = aa + 9*ai[i];
254 	vi    = aj + ai[i];
255 	nz    = diag[i] - ai[i];
256 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
257 	while (nz--) {
258 	  idx  = 3*(*vi++);
259 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
260 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
261 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
262 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
263 	  v   += 9;
264 	}
265 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
266 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
267 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
268         idiag   += 9;
269         i2      += 3;
270       }
271       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
272       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
273     }
274     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
275         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
276       i2    = 0;
277       mdiag = a->idiag+9*a->mbs;
278       for (i=0; i<m; i++) {
279         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
280         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
281         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
282         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
283         mdiag  += 9;
284         i2     += 3;
285       }
286       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
287     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
288       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
289     }
290     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
291       idiag   = a->idiag+9*a->mbs - 9;
292       i2      = 3*m - 3;
293       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
294       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
295       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
296       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
297       idiag -= 9;
298       i2    -= 3;
299       for (i=m-2; i>=0; i--) {
300 	v     = aa + 9*(diag[i]+1);
301 	vi    = aj + diag[i] + 1;
302 	nz    = ai[i+1] - diag[i] - 1;
303 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
304 	while (nz--) {
305 	  idx  = 3*(*vi++);
306 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
307 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
308 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
309 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
310 	  v   += 9;
311 	}
312 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
313 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
314 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
315         idiag   -= 9;
316         i2      -= 3;
317       }
318       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
319     }
320   } else {
321     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
322   }
323   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
324   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
331 {
332   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
333   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
334   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
335   PetscErrorCode     ierr;
336   PetscInt           m = a->mbs,i,i2,nz,idx;
337   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
338 
339   PetscFunctionBegin;
340   its = its*lits;
341   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
342   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
343   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
344   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
345   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
346 
347   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
348 
349   diag  = a->diag;
350   idiag = a->idiag;
351   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
352   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
353 
354   if (flag & SOR_ZERO_INITIAL_GUESS) {
355     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
356       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
357       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
358       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
359       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
360       i2     = 4;
361       idiag += 16;
362       for (i=1; i<m; i++) {
363 	v     = aa + 16*ai[i];
364 	vi    = aj + ai[i];
365 	nz    = diag[i] - ai[i];
366 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
367 	while (nz--) {
368 	  idx  = 4*(*vi++);
369 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
370 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
371 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
372 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
373 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
374 	  v   += 16;
375 	}
376 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
377 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
378 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
379 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
380         idiag   += 16;
381         i2      += 4;
382       }
383       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
384       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
385     }
386     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
387         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
388       i2    = 0;
389       mdiag = a->idiag+16*a->mbs;
390       for (i=0; i<m; i++) {
391         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
392         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
393         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
394         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
395         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
396         mdiag  += 16;
397         i2     += 4;
398       }
399       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
400     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
401       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
402     }
403     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
404       idiag   = a->idiag+16*a->mbs - 16;
405       i2      = 4*m - 4;
406       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
407       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
408       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
409       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
410       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
411       idiag -= 16;
412       i2    -= 4;
413       for (i=m-2; i>=0; i--) {
414 	v     = aa + 16*(diag[i]+1);
415 	vi    = aj + diag[i] + 1;
416 	nz    = ai[i+1] - diag[i] - 1;
417 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
418 	while (nz--) {
419 	  idx  = 4*(*vi++);
420 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
421 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
422 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
423 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
424 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
425 	  v   += 16;
426 	}
427 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
428 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
429 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
430 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
431         idiag   -= 16;
432         i2      -= 4;
433       }
434       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
435     }
436   } else {
437     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
438   }
439   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
440   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
447 {
448   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
449   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
450   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
451   PetscErrorCode     ierr;
452   PetscInt           m = a->mbs,i,i2,nz,idx;
453   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
454 
455   PetscFunctionBegin;
456   its = its*lits;
457   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
458   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
459   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
460   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
461   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
462 
463   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
464 
465   diag  = a->diag;
466   idiag = a->idiag;
467   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
468   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
469 
470   if (flag & SOR_ZERO_INITIAL_GUESS) {
471     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
472       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
473       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
474       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
475       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
476       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
477       i2     = 5;
478       idiag += 25;
479       for (i=1; i<m; i++) {
480 	v     = aa + 25*ai[i];
481 	vi    = aj + ai[i];
482 	nz    = diag[i] - ai[i];
483 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
484 	while (nz--) {
485 	  idx  = 5*(*vi++);
486 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
487 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
488 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
489 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
490 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
491 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
492 	  v   += 25;
493 	}
494 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
495 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
496 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
497 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
498 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
499         idiag   += 25;
500         i2      += 5;
501       }
502       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
503       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
504     }
505     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
506         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
507       i2    = 0;
508       mdiag = a->idiag+25*a->mbs;
509       for (i=0; i<m; i++) {
510         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
511         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
512         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
513         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
514         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
515         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
516         mdiag  += 25;
517         i2     += 5;
518       }
519       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
520     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
521       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
522     }
523     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
524       idiag   = a->idiag+25*a->mbs - 25;
525       i2      = 5*m - 5;
526       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
527       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
528       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
529       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
530       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
531       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
532       idiag -= 25;
533       i2    -= 5;
534       for (i=m-2; i>=0; i--) {
535 	v     = aa + 25*(diag[i]+1);
536 	vi    = aj + diag[i] + 1;
537 	nz    = ai[i+1] - diag[i] - 1;
538 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
539 	while (nz--) {
540 	  idx  = 5*(*vi++);
541 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
542 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
543 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
544 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
545 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
546 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
547 	  v   += 25;
548 	}
549 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
550 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
551 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
552 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
553 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
554         idiag   -= 25;
555         i2      -= 5;
556       }
557       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
558     }
559   } else {
560     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
561   }
562   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
563   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
564   PetscFunctionReturn(0);
565 }
566 
567 /*
568     Special version for direct calls from Fortran (Used in PETSc-fun3d)
569 */
570 #if defined(PETSC_HAVE_FORTRAN_CAPS)
571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
573 #define matsetvaluesblocked4_ matsetvaluesblocked4
574 #endif
575 
576 EXTERN_C_BEGIN
577 #undef __FUNCT__
578 #define __FUNCT__ "matsetvaluesblocked4_"
579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
580 {
581   Mat               A = *AA;
582   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
583   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
584   PetscInt          *ai=a->i,*ailen=a->ilen;
585   PetscInt          *aj=a->j,stepval,lastcol = -1;
586   const PetscScalar *value = v;
587   MatScalar         *ap,*aa = a->a,*bap;
588 
589   PetscFunctionBegin;
590   if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
591   stepval = (n-1)*4;
592   for (k=0; k<m; k++) { /* loop over added rows */
593     row  = im[k];
594     rp   = aj + ai[row];
595     ap   = aa + 16*ai[row];
596     nrow = ailen[row];
597     low  = 0;
598     high = nrow;
599     for (l=0; l<n; l++) { /* loop over added columns */
600       col = in[l];
601       if (col <= lastcol) low = 0; else high = nrow;
602       lastcol = col;
603       value = v + k*(stepval+4 + l)*4;
604       while (high-low > 7) {
605         t = (low+high)/2;
606         if (rp[t] > col) high = t;
607         else             low  = t;
608       }
609       for (i=low; i<high; i++) {
610         if (rp[i] > col) break;
611         if (rp[i] == col) {
612           bap  = ap +  16*i;
613           for (ii=0; ii<4; ii++,value+=stepval) {
614             for (jj=ii; jj<16; jj+=4) {
615               bap[jj] += *value++;
616             }
617           }
618           goto noinsert2;
619         }
620       }
621       N = nrow++ - 1;
622       high++; /* added new column index thus must search to one higher than before */
623       /* shift up all the later entries in this row */
624       for (ii=N; ii>=i; ii--) {
625         rp[ii+1] = rp[ii];
626         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
627       }
628       if (N >= i) {
629         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
630       }
631       rp[i] = col;
632       bap   = ap +  16*i;
633       for (ii=0; ii<4; ii++,value+=stepval) {
634         for (jj=ii; jj<16; jj+=4) {
635           bap[jj] = *value++;
636         }
637       }
638       noinsert2:;
639       low = i;
640     }
641     ailen[row] = nrow;
642   }
643   PetscFunctionReturnVoid();
644 }
645 EXTERN_C_END
646 
647 #if defined(PETSC_HAVE_FORTRAN_CAPS)
648 #define matsetvalues4_ MATSETVALUES4
649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
650 #define matsetvalues4_ matsetvalues4
651 #endif
652 
653 EXTERN_C_BEGIN
654 #undef __FUNCT__
655 #define __FUNCT__ "MatSetValues4_"
656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
657 {
658   Mat         A = *AA;
659   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
660   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
661   PetscInt    *ai=a->i,*ailen=a->ilen;
662   PetscInt    *aj=a->j,brow,bcol;
663   PetscInt    ridx,cidx,lastcol = -1;
664   MatScalar   *ap,value,*aa=a->a,*bap;
665 
666   PetscFunctionBegin;
667   for (k=0; k<m; k++) { /* loop over added rows */
668     row  = im[k]; brow = row/4;
669     rp   = aj + ai[brow];
670     ap   = aa + 16*ai[brow];
671     nrow = ailen[brow];
672     low  = 0;
673     high = nrow;
674     for (l=0; l<n; l++) { /* loop over added columns */
675       col = in[l]; bcol = col/4;
676       ridx = row % 4; cidx = col % 4;
677       value = v[l + k*n];
678       if (col <= lastcol) low = 0; else high = nrow;
679       lastcol = col;
680       while (high-low > 7) {
681         t = (low+high)/2;
682         if (rp[t] > bcol) high = t;
683         else              low  = t;
684       }
685       for (i=low; i<high; i++) {
686         if (rp[i] > bcol) break;
687         if (rp[i] == bcol) {
688           bap  = ap +  16*i + 4*cidx + ridx;
689           *bap += value;
690           goto noinsert1;
691         }
692       }
693       N = nrow++ - 1;
694       high++; /* added new column thus must search to one higher than before */
695       /* shift up all the later entries in this row */
696       for (ii=N; ii>=i; ii--) {
697         rp[ii+1] = rp[ii];
698         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
699       }
700       if (N>=i) {
701         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
702       }
703       rp[i]                    = bcol;
704       ap[16*i + 4*cidx + ridx] = value;
705       noinsert1:;
706       low = i;
707     }
708     ailen[brow] = nrow;
709   }
710   PetscFunctionReturnVoid();
711 }
712 EXTERN_C_END
713 
714 /*  UGLY, ugly, ugly
715    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
716    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
717    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
718    converts the entries into single precision and then calls ..._MatScalar() to put them
719    into the single precision data structures.
720 */
721 #if defined(PETSC_USE_MAT_SINGLE)
722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
723 #else
724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
725 #endif
726 
727 #define CHUNKSIZE  10
728 
729 /*
730      Checks for missing diagonals
731 */
732 #undef __FUNCT__
733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
735 {
736   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
737   PetscErrorCode ierr;
738   PetscInt       *diag,*jj = a->j,i;
739 
740   PetscFunctionBegin;
741   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
742   diag = a->diag;
743   for (i=0; i<a->mbs; i++) {
744     if (jj[diag[i]] != i) {
745       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
746     }
747   }
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
753 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
754 {
755   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
756   PetscErrorCode ierr;
757   PetscInt       i,j,m = a->mbs;
758 
759   PetscFunctionBegin;
760   if (!a->diag) {
761     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
762   }
763   for (i=0; i<m; i++) {
764     a->diag[i] = a->i[i+1];
765     for (j=a->i[i]; j<a->i[i+1]; j++) {
766       if (a->j[j] == i) {
767         a->diag[i] = j;
768         break;
769       }
770     }
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 
776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
777 
778 #undef __FUNCT__
779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
781 {
782   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
783   PetscErrorCode ierr;
784   PetscInt     i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs;
785   PetscInt     *tia, *tja;
786 
787   PetscFunctionBegin;
788   *nn = n;
789   if (!ia) PetscFunctionReturn(0);
790   if (symmetric) {
791     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
792   } else {
793     tia = a->i; tja = a->j;
794   }
795 
796   if (!blockcompressed) {
797     /* malloc & create the natural set of indices */
798     ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr);
799     for (i=0; i<n+1; i++) {
800       for (j=0; j<bs; j++) {
801         *ia[i*bs+j] = tia[i]*bs+j;
802       }
803     }
804     for (i=0; i<nz; i++) {
805       for (j=0; j<bs; j++) {
806         *ja[i*bs+j] = tia[i]*bs+j;
807       }
808     }
809     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
810       ierr = PetscFree(tia);CHKERRQ(ierr);
811       ierr = PetscFree(tja);CHKERRQ(ierr);
812     }
813   } else {
814     *ia = tia;
815     *ja = tja;
816   }
817   if (oshift == 1) {
818     for (i=0; i<nz; i++) (*ja)[i]++;
819     for (i=0; i<n+1; i++) (*ia)[i]++;
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNCT__
825 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
826 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
827 {
828   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
829   PetscErrorCode ierr;
830   PetscInt       i,n = a->mbs,nz = a->i[n];
831 
832   PetscFunctionBegin;
833   if (!ia) PetscFunctionReturn(0);
834   if (!blockcompressed) {
835     ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr);
836   }else if (symmetric) {
837     ierr = PetscFree(*ia);CHKERRQ(ierr);
838     ierr = PetscFree(*ja);CHKERRQ(ierr);
839   } else if (oshift == 1) { /* blockcompressed */
840     for (i=0; i<nz; i++) a->j[i]--;
841     for (i=0; i<n+1; i++) a->i[i]--;
842   }
843   PetscFunctionReturn(0);
844 }
845 
846 #undef __FUNCT__
847 #define __FUNCT__ "MatDestroy_SeqBAIJ"
848 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
849 {
850   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
851   PetscErrorCode ierr;
852 
853   PetscFunctionBegin;
854 #if defined(PETSC_USE_LOG)
855   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
856 #endif
857   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
858   if (a->row) {
859     ierr = ISDestroy(a->row);CHKERRQ(ierr);
860   }
861   if (a->col) {
862     ierr = ISDestroy(a->col);CHKERRQ(ierr);
863   }
864   ierr = PetscFree(a->diag);CHKERRQ(ierr);
865   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
866   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
867   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
868   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
869   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
870   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
871 #if defined(PETSC_USE_MAT_SINGLE)
872   ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);
873 #endif
874   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
875   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
876 
877   ierr = PetscFree(a);CHKERRQ(ierr);
878 
879   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
880   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
881   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
882   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
883   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
884   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
885   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
886   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatSetOption_SeqBAIJ"
892 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
893 {
894   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
895   PetscErrorCode ierr;
896 
897   PetscFunctionBegin;
898   switch (op) {
899   case MAT_ROW_ORIENTED:
900     a->roworiented    = PETSC_TRUE;
901     break;
902   case MAT_COLUMN_ORIENTED:
903     a->roworiented    = PETSC_FALSE;
904     break;
905   case MAT_COLUMNS_SORTED:
906     a->sorted         = PETSC_TRUE;
907     break;
908   case MAT_COLUMNS_UNSORTED:
909     a->sorted         = PETSC_FALSE;
910     break;
911   case MAT_KEEP_ZEROED_ROWS:
912     a->keepzeroedrows = PETSC_TRUE;
913     break;
914   case MAT_NO_NEW_NONZERO_LOCATIONS:
915     a->nonew          = 1;
916     break;
917   case MAT_NEW_NONZERO_LOCATION_ERR:
918     a->nonew          = -1;
919     break;
920   case MAT_NEW_NONZERO_ALLOCATION_ERR:
921     a->nonew          = -2;
922     break;
923   case MAT_YES_NEW_NONZERO_LOCATIONS:
924     a->nonew          = 0;
925     break;
926   case MAT_ROWS_SORTED:
927   case MAT_ROWS_UNSORTED:
928   case MAT_YES_NEW_DIAGONALS:
929   case MAT_IGNORE_OFF_PROC_ENTRIES:
930   case MAT_USE_HASH_TABLE:
931     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
932     break;
933   case MAT_NO_NEW_DIAGONALS:
934     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
935   case MAT_SYMMETRIC:
936   case MAT_STRUCTURALLY_SYMMETRIC:
937   case MAT_NOT_SYMMETRIC:
938   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
939   case MAT_HERMITIAN:
940   case MAT_NOT_HERMITIAN:
941   case MAT_SYMMETRY_ETERNAL:
942   case MAT_NOT_SYMMETRY_ETERNAL:
943     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
944     break;
945   default:
946     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatGetRow_SeqBAIJ"
953 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
954 {
955   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
956   PetscErrorCode ierr;
957   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
958   MatScalar      *aa,*aa_i;
959   PetscScalar    *v_i;
960 
961   PetscFunctionBegin;
962   bs  = A->rmap.bs;
963   ai  = a->i;
964   aj  = a->j;
965   aa  = a->a;
966   bs2 = a->bs2;
967 
968   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
969 
970   bn  = row/bs;   /* Block number */
971   bp  = row % bs; /* Block Position */
972   M   = ai[bn+1] - ai[bn];
973   *nz = bs*M;
974 
975   if (v) {
976     *v = 0;
977     if (*nz) {
978       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
979       for (i=0; i<M; i++) { /* for each block in the block row */
980         v_i  = *v + i*bs;
981         aa_i = aa + bs2*(ai[bn] + i);
982         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
983       }
984     }
985   }
986 
987   if (idx) {
988     *idx = 0;
989     if (*nz) {
990       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
991       for (i=0; i<M; i++) { /* for each block in the block row */
992         idx_i = *idx + i*bs;
993         itmp  = bs*aj[ai[bn] + i];
994         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
995       }
996     }
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 #undef __FUNCT__
1002 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1003 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1004 {
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1009   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 #undef __FUNCT__
1014 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1015 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
1016 {
1017   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1018   Mat            C;
1019   PetscErrorCode ierr;
1020   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1021   PetscInt       *rows,*cols,bs2=a->bs2;
1022   PetscScalar    *array;
1023 
1024   PetscFunctionBegin;
1025   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1026   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1027   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1028 
1029 #if defined(PETSC_USE_MAT_SINGLE)
1030   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1031   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1032 #else
1033   array = a->a;
1034 #endif
1035 
1036   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1037   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1038   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1039   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1040   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1041   ierr = PetscFree(col);CHKERRQ(ierr);
1042   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1043   cols = rows + bs;
1044   for (i=0; i<mbs; i++) {
1045     cols[0] = i*bs;
1046     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1047     len = ai[i+1] - ai[i];
1048     for (j=0; j<len; j++) {
1049       rows[0] = (*aj++)*bs;
1050       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1051       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1052       array += bs2;
1053     }
1054   }
1055   ierr = PetscFree(rows);CHKERRQ(ierr);
1056 #if defined(PETSC_USE_MAT_SINGLE)
1057   ierr = PetscFree(array);
1058 #endif
1059 
1060   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1061   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1062 
1063   if (B) {
1064     *B = C;
1065   } else {
1066     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1067   }
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1073 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1074 {
1075   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1076   PetscErrorCode ierr;
1077   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1078   int            fd;
1079   PetscScalar    *aa;
1080   FILE           *file;
1081 
1082   PetscFunctionBegin;
1083   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1084   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1085   col_lens[0] = MAT_FILE_COOKIE;
1086 
1087   col_lens[1] = A->rmap.N;
1088   col_lens[2] = A->cmap.n;
1089   col_lens[3] = a->nz*bs2;
1090 
1091   /* store lengths of each row and write (including header) to file */
1092   count = 0;
1093   for (i=0; i<a->mbs; i++) {
1094     for (j=0; j<bs; j++) {
1095       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1096     }
1097   }
1098   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1099   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1100 
1101   /* store column indices (zero start index) */
1102   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1103   count = 0;
1104   for (i=0; i<a->mbs; i++) {
1105     for (j=0; j<bs; j++) {
1106       for (k=a->i[i]; k<a->i[i+1]; k++) {
1107         for (l=0; l<bs; l++) {
1108           jj[count++] = bs*a->j[k] + l;
1109         }
1110       }
1111     }
1112   }
1113   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1114   ierr = PetscFree(jj);CHKERRQ(ierr);
1115 
1116   /* store nonzero values */
1117   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1118   count = 0;
1119   for (i=0; i<a->mbs; i++) {
1120     for (j=0; j<bs; j++) {
1121       for (k=a->i[i]; k<a->i[i+1]; k++) {
1122         for (l=0; l<bs; l++) {
1123           aa[count++] = a->a[bs2*k + l*bs + j];
1124         }
1125       }
1126     }
1127   }
1128   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1129   ierr = PetscFree(aa);CHKERRQ(ierr);
1130 
1131   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1132   if (file) {
1133     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1134   }
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 #undef __FUNCT__
1139 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1140 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1141 {
1142   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1143   PetscErrorCode    ierr;
1144   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1145   PetscViewerFormat format;
1146 
1147   PetscFunctionBegin;
1148   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1149   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1150     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1151   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1152     Mat aij;
1153     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1154     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1155     ierr = MatDestroy(aij);CHKERRQ(ierr);
1156   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1157      PetscFunctionReturn(0);
1158   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1159     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1160     for (i=0; i<a->mbs; i++) {
1161       for (j=0; j<bs; j++) {
1162         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1163         for (k=a->i[i]; k<a->i[i+1]; k++) {
1164           for (l=0; l<bs; l++) {
1165 #if defined(PETSC_USE_COMPLEX)
1166             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1167               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1168                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1169             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1170               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1171                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1172             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1173               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1174             }
1175 #else
1176             if (a->a[bs2*k + l*bs + j] != 0.0) {
1177               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1178             }
1179 #endif
1180           }
1181         }
1182         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1183       }
1184     }
1185     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1186   } else {
1187     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1188     for (i=0; i<a->mbs; i++) {
1189       for (j=0; j<bs; j++) {
1190         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1191         for (k=a->i[i]; k<a->i[i+1]; k++) {
1192           for (l=0; l<bs; l++) {
1193 #if defined(PETSC_USE_COMPLEX)
1194             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1195               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1196                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1197             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1198               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1199                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1200             } else {
1201               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1202             }
1203 #else
1204             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1205 #endif
1206           }
1207         }
1208         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1209       }
1210     }
1211     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1212   }
1213   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1219 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1220 {
1221   Mat            A = (Mat) Aa;
1222   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1223   PetscErrorCode ierr;
1224   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1225   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1226   MatScalar      *aa;
1227   PetscViewer    viewer;
1228 
1229   PetscFunctionBegin;
1230 
1231   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1232   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1233 
1234   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1235 
1236   /* loop over matrix elements drawing boxes */
1237   color = PETSC_DRAW_BLUE;
1238   for (i=0,row=0; i<mbs; i++,row+=bs) {
1239     for (j=a->i[i]; j<a->i[i+1]; j++) {
1240       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1241       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1242       aa = a->a + j*bs2;
1243       for (k=0; k<bs; k++) {
1244         for (l=0; l<bs; l++) {
1245           if (PetscRealPart(*aa++) >=  0.) continue;
1246           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1247         }
1248       }
1249     }
1250   }
1251   color = PETSC_DRAW_CYAN;
1252   for (i=0,row=0; i<mbs; i++,row+=bs) {
1253     for (j=a->i[i]; j<a->i[i+1]; j++) {
1254       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1255       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1256       aa = a->a + j*bs2;
1257       for (k=0; k<bs; k++) {
1258         for (l=0; l<bs; l++) {
1259           if (PetscRealPart(*aa++) != 0.) continue;
1260           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1261         }
1262       }
1263     }
1264   }
1265 
1266   color = PETSC_DRAW_RED;
1267   for (i=0,row=0; i<mbs; i++,row+=bs) {
1268     for (j=a->i[i]; j<a->i[i+1]; j++) {
1269       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1270       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1271       aa = a->a + j*bs2;
1272       for (k=0; k<bs; k++) {
1273         for (l=0; l<bs; l++) {
1274           if (PetscRealPart(*aa++) <= 0.) continue;
1275           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1276         }
1277       }
1278     }
1279   }
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1285 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1286 {
1287   PetscErrorCode ierr;
1288   PetscReal      xl,yl,xr,yr,w,h;
1289   PetscDraw      draw;
1290   PetscTruth     isnull;
1291 
1292   PetscFunctionBegin;
1293 
1294   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1295   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1296 
1297   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1298   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1299   xr += w;    yr += h;  xl = -w;     yl = -h;
1300   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1301   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1302   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatView_SeqBAIJ"
1308 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1309 {
1310   PetscErrorCode ierr;
1311   PetscTruth     iascii,isbinary,isdraw;
1312 
1313   PetscFunctionBegin;
1314   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1315   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1316   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1317   if (iascii){
1318     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1319   } else if (isbinary) {
1320     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1321   } else if (isdraw) {
1322     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1323   } else {
1324     Mat B;
1325     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1326     ierr = MatView(B,viewer);CHKERRQ(ierr);
1327     ierr = MatDestroy(B);CHKERRQ(ierr);
1328   }
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 
1333 #undef __FUNCT__
1334 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1335 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1336 {
1337   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1338   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1339   PetscInt    *ai = a->i,*ailen = a->ilen;
1340   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1341   MatScalar   *ap,*aa = a->a,zero = 0.0;
1342 
1343   PetscFunctionBegin;
1344   for (k=0; k<m; k++) { /* loop over rows */
1345     row  = im[k]; brow = row/bs;
1346     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1347     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1348     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1349     nrow = ailen[brow];
1350     for (l=0; l<n; l++) { /* loop over columns */
1351       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1352       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1353       col  = in[l] ;
1354       bcol = col/bs;
1355       cidx = col%bs;
1356       ridx = row%bs;
1357       high = nrow;
1358       low  = 0; /* assume unsorted */
1359       while (high-low > 5) {
1360         t = (low+high)/2;
1361         if (rp[t] > bcol) high = t;
1362         else             low  = t;
1363       }
1364       for (i=low; i<high; i++) {
1365         if (rp[i] > bcol) break;
1366         if (rp[i] == bcol) {
1367           *v++ = ap[bs2*i+bs*cidx+ridx];
1368           goto finished;
1369         }
1370       }
1371       *v++ = zero;
1372       finished:;
1373     }
1374   }
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #if defined(PETSC_USE_MAT_SINGLE)
1379 #undef __FUNCT__
1380 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1381 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1382 {
1383   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1384   PetscErrorCode ierr;
1385   PetscInt       i,N = m*n*b->bs2;
1386   MatScalar      *vsingle;
1387 
1388   PetscFunctionBegin;
1389   if (N > b->setvalueslen) {
1390     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
1391     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1392     b->setvalueslen  = N;
1393   }
1394   vsingle = b->setvaluescopy;
1395   for (i=0; i<N; i++) {
1396     vsingle[i] = v[i];
1397   }
1398   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1399   PetscFunctionReturn(0);
1400 }
1401 #endif
1402 
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1406 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1407 {
1408   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1409   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1410   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1411   PetscErrorCode  ierr;
1412   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1413   PetscTruth      roworiented=a->roworiented;
1414   const MatScalar *value = v;
1415   MatScalar       *ap,*aa = a->a,*bap;
1416 
1417   PetscFunctionBegin;
1418   if (roworiented) {
1419     stepval = (n-1)*bs;
1420   } else {
1421     stepval = (m-1)*bs;
1422   }
1423   for (k=0; k<m; k++) { /* loop over added rows */
1424     row  = im[k];
1425     if (row < 0) continue;
1426 #if defined(PETSC_USE_DEBUG)
1427     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1428 #endif
1429     rp   = aj + ai[row];
1430     ap   = aa + bs2*ai[row];
1431     rmax = imax[row];
1432     nrow = ailen[row];
1433     low  = 0;
1434     high = nrow;
1435     for (l=0; l<n; l++) { /* loop over added columns */
1436       if (in[l] < 0) continue;
1437 #if defined(PETSC_USE_DEBUG)
1438       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1439 #endif
1440       col = in[l];
1441       if (roworiented) {
1442         value = v + k*(stepval+bs)*bs + l*bs;
1443       } else {
1444         value = v + l*(stepval+bs)*bs + k*bs;
1445       }
1446       if (col <= lastcol) low = 0; else high = nrow;
1447       lastcol = col;
1448       while (high-low > 7) {
1449         t = (low+high)/2;
1450         if (rp[t] > col) high = t;
1451         else             low  = t;
1452       }
1453       for (i=low; i<high; i++) {
1454         if (rp[i] > col) break;
1455         if (rp[i] == col) {
1456           bap  = ap +  bs2*i;
1457           if (roworiented) {
1458             if (is == ADD_VALUES) {
1459               for (ii=0; ii<bs; ii++,value+=stepval) {
1460                 for (jj=ii; jj<bs2; jj+=bs) {
1461                   bap[jj] += *value++;
1462                 }
1463               }
1464             } else {
1465               for (ii=0; ii<bs; ii++,value+=stepval) {
1466                 for (jj=ii; jj<bs2; jj+=bs) {
1467                   bap[jj] = *value++;
1468                 }
1469               }
1470             }
1471           } else {
1472             if (is == ADD_VALUES) {
1473               for (ii=0; ii<bs; ii++,value+=stepval) {
1474                 for (jj=0; jj<bs; jj++) {
1475                   *bap++ += *value++;
1476                 }
1477               }
1478             } else {
1479               for (ii=0; ii<bs; ii++,value+=stepval) {
1480                 for (jj=0; jj<bs; jj++) {
1481                   *bap++  = *value++;
1482                 }
1483               }
1484             }
1485           }
1486           goto noinsert2;
1487         }
1488       }
1489       if (nonew == 1) goto noinsert2;
1490       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1491       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1492       N = nrow++ - 1; high++;
1493       /* shift up all the later entries in this row */
1494       for (ii=N; ii>=i; ii--) {
1495         rp[ii+1] = rp[ii];
1496         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1497       }
1498       if (N >= i) {
1499         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1500       }
1501       rp[i] = col;
1502       bap   = ap +  bs2*i;
1503       if (roworiented) {
1504         for (ii=0; ii<bs; ii++,value+=stepval) {
1505           for (jj=ii; jj<bs2; jj+=bs) {
1506             bap[jj] = *value++;
1507           }
1508         }
1509       } else {
1510         for (ii=0; ii<bs; ii++,value+=stepval) {
1511           for (jj=0; jj<bs; jj++) {
1512             *bap++  = *value++;
1513           }
1514         }
1515       }
1516       noinsert2:;
1517       low = i;
1518     }
1519     ailen[row] = nrow;
1520   }
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 #undef __FUNCT__
1525 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1526 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1527 {
1528   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1529   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1530   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1531   PetscErrorCode ierr;
1532   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1533   MatScalar      *aa = a->a,*ap;
1534   PetscReal      ratio=0.6;
1535 
1536   PetscFunctionBegin;
1537   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1538 
1539   if (m) rmax = ailen[0];
1540   for (i=1; i<mbs; i++) {
1541     /* move each row back by the amount of empty slots (fshift) before it*/
1542     fshift += imax[i-1] - ailen[i-1];
1543     rmax   = PetscMax(rmax,ailen[i]);
1544     if (fshift) {
1545       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1546       N = ailen[i];
1547       for (j=0; j<N; j++) {
1548         ip[j-fshift] = ip[j];
1549         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1550       }
1551     }
1552     ai[i] = ai[i-1] + ailen[i-1];
1553   }
1554   if (mbs) {
1555     fshift += imax[mbs-1] - ailen[mbs-1];
1556     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1557   }
1558   /* reset ilen and imax for each row */
1559   for (i=0; i<mbs; i++) {
1560     ailen[i] = imax[i] = ai[i+1] - ai[i];
1561   }
1562   a->nz = ai[mbs];
1563 
1564   /* diagonals may have moved, so kill the diagonal pointers */
1565   a->idiagvalid = PETSC_FALSE;
1566   if (fshift && a->diag) {
1567     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1568     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1569     a->diag = 0;
1570   }
1571   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1572   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1573   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1574   a->reallocs          = 0;
1575   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1576 
1577   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1578   if (a->compressedrow.use){
1579     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1580   }
1581 
1582   A->same_nonzero = PETSC_TRUE;
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 /*
1587    This function returns an array of flags which indicate the locations of contiguous
1588    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1589    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1590    Assume: sizes should be long enough to hold all the values.
1591 */
1592 #undef __FUNCT__
1593 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1594 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1595 {
1596   PetscInt   i,j,k,row;
1597   PetscTruth flg;
1598 
1599   PetscFunctionBegin;
1600   for (i=0,j=0; i<n; j++) {
1601     row = idx[i];
1602     if (row%bs!=0) { /* Not the begining of a block */
1603       sizes[j] = 1;
1604       i++;
1605     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1606       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1607       i++;
1608     } else { /* Begining of the block, so check if the complete block exists */
1609       flg = PETSC_TRUE;
1610       for (k=1; k<bs; k++) {
1611         if (row+k != idx[i+k]) { /* break in the block */
1612           flg = PETSC_FALSE;
1613           break;
1614         }
1615       }
1616       if (flg) { /* No break in the bs */
1617         sizes[j] = bs;
1618         i+= bs;
1619       } else {
1620         sizes[j] = 1;
1621         i++;
1622       }
1623     }
1624   }
1625   *bs_max = j;
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 #undef __FUNCT__
1630 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1631 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1632 {
1633   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1634   PetscErrorCode ierr;
1635   PetscInt       i,j,k,count,*rows;
1636   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1637   PetscScalar    zero = 0.0;
1638   MatScalar      *aa;
1639 
1640   PetscFunctionBegin;
1641   /* Make a copy of the IS and  sort it */
1642   /* allocate memory for rows,sizes */
1643   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1644   sizes = rows + is_n;
1645 
1646   /* copy IS values to rows, and sort them */
1647   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1648   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1649   if (baij->keepzeroedrows) {
1650     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1651     bs_max = is_n;
1652     A->same_nonzero = PETSC_TRUE;
1653   } else {
1654     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1655     A->same_nonzero = PETSC_FALSE;
1656   }
1657 
1658   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1659     row   = rows[j];
1660     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1661     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1662     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1663     if (sizes[i] == bs && !baij->keepzeroedrows) {
1664       if (diag != 0.0) {
1665         if (baij->ilen[row/bs] > 0) {
1666           baij->ilen[row/bs]       = 1;
1667           baij->j[baij->i[row/bs]] = row/bs;
1668           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1669         }
1670         /* Now insert all the diagonal values for this bs */
1671         for (k=0; k<bs; k++) {
1672           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1673         }
1674       } else { /* (diag == 0.0) */
1675         baij->ilen[row/bs] = 0;
1676       } /* end (diag == 0.0) */
1677     } else { /* (sizes[i] != bs) */
1678 #if defined (PETSC_USE_DEBUG)
1679       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1680 #endif
1681       for (k=0; k<count; k++) {
1682         aa[0] =  zero;
1683         aa    += bs;
1684       }
1685       if (diag != 0.0) {
1686         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1687       }
1688     }
1689   }
1690 
1691   ierr = PetscFree(rows);CHKERRQ(ierr);
1692   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1693   PetscFunctionReturn(0);
1694 }
1695 
1696 #undef __FUNCT__
1697 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1698 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1699 {
1700   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1701   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1702   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1703   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1704   PetscErrorCode ierr;
1705   PetscInt       ridx,cidx,bs2=a->bs2;
1706   PetscTruth     roworiented=a->roworiented;
1707   MatScalar      *ap,value,*aa=a->a,*bap;
1708 
1709   PetscFunctionBegin;
1710   for (k=0; k<m; k++) { /* loop over added rows */
1711     row  = im[k];
1712     brow = row/bs;
1713     if (row < 0) continue;
1714 #if defined(PETSC_USE_DEBUG)
1715     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1716 #endif
1717     rp   = aj + ai[brow];
1718     ap   = aa + bs2*ai[brow];
1719     rmax = imax[brow];
1720     nrow = ailen[brow];
1721     low  = 0;
1722     high = nrow;
1723     for (l=0; l<n; l++) { /* loop over added columns */
1724       if (in[l] < 0) continue;
1725 #if defined(PETSC_USE_DEBUG)
1726       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1727 #endif
1728       col = in[l]; bcol = col/bs;
1729       ridx = row % bs; cidx = col % bs;
1730       if (roworiented) {
1731         value = v[l + k*n];
1732       } else {
1733         value = v[k + l*m];
1734       }
1735       if (col <= lastcol) low = 0; else high = nrow;
1736       lastcol = col;
1737       while (high-low > 7) {
1738         t = (low+high)/2;
1739         if (rp[t] > bcol) high = t;
1740         else              low  = t;
1741       }
1742       for (i=low; i<high; i++) {
1743         if (rp[i] > bcol) break;
1744         if (rp[i] == bcol) {
1745           bap  = ap +  bs2*i + bs*cidx + ridx;
1746           if (is == ADD_VALUES) *bap += value;
1747           else                  *bap  = value;
1748           goto noinsert1;
1749         }
1750       }
1751       if (nonew == 1) goto noinsert1;
1752       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1753       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1754       N = nrow++ - 1; high++;
1755       /* shift up all the later entries in this row */
1756       for (ii=N; ii>=i; ii--) {
1757         rp[ii+1] = rp[ii];
1758         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1759       }
1760       if (N>=i) {
1761         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1762       }
1763       rp[i]                      = bcol;
1764       ap[bs2*i + bs*cidx + ridx] = value;
1765       a->nz++;
1766       noinsert1:;
1767       low = i;
1768     }
1769     ailen[brow] = nrow;
1770   }
1771   A->same_nonzero = PETSC_FALSE;
1772   PetscFunctionReturn(0);
1773 }
1774 
1775 
1776 #undef __FUNCT__
1777 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1778 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1779 {
1780   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1781   Mat            outA;
1782   PetscErrorCode ierr;
1783   PetscTruth     row_identity,col_identity;
1784 
1785   PetscFunctionBegin;
1786   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1787   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1788   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1789   if (!row_identity || !col_identity) {
1790     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1791   }
1792 
1793   outA          = inA;
1794   inA->factor   = FACTOR_LU;
1795 
1796   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1797 
1798   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1799   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
1800   a->row = row;
1801   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1802   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
1803   a->col = col;
1804 
1805   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1806   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1807   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1808 
1809   /*
1810       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1811       for ILU(0) factorization with natural ordering
1812   */
1813   if (inA->rmap.bs < 8) {
1814     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1815   } else {
1816     if (!a->solve_work) {
1817       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1818       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1819     }
1820   }
1821 
1822   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1823 
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 EXTERN_C_BEGIN
1828 #undef __FUNCT__
1829 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1830 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1831 {
1832   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1833   PetscInt    i,nz,nbs;
1834 
1835   PetscFunctionBegin;
1836   nz  = baij->maxnz/baij->bs2;
1837   nbs = baij->nbs;
1838   for (i=0; i<nz; i++) {
1839     baij->j[i] = indices[i];
1840   }
1841   baij->nz = nz;
1842   for (i=0; i<nbs; i++) {
1843     baij->ilen[i] = baij->imax[i];
1844   }
1845 
1846   PetscFunctionReturn(0);
1847 }
1848 EXTERN_C_END
1849 
1850 #undef __FUNCT__
1851 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1852 /*@
1853     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1854        in the matrix.
1855 
1856   Input Parameters:
1857 +  mat - the SeqBAIJ matrix
1858 -  indices - the column indices
1859 
1860   Level: advanced
1861 
1862   Notes:
1863     This can be called if you have precomputed the nonzero structure of the
1864   matrix and want to provide it to the matrix object to improve the performance
1865   of the MatSetValues() operation.
1866 
1867     You MUST have set the correct numbers of nonzeros per row in the call to
1868   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1869 
1870     MUST be called before any calls to MatSetValues();
1871 
1872 @*/
1873 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1874 {
1875   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1876 
1877   PetscFunctionBegin;
1878   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1879   PetscValidPointer(indices,2);
1880   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1881   if (f) {
1882     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1883   } else {
1884     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1885   }
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 #undef __FUNCT__
1890 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
1891 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
1892 {
1893   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1894   PetscErrorCode ierr;
1895   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1896   PetscReal      atmp;
1897   PetscScalar    *x,zero = 0.0;
1898   MatScalar      *aa;
1899   PetscInt       ncols,brow,krow,kcol;
1900 
1901   PetscFunctionBegin;
1902   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1903   bs   = A->rmap.bs;
1904   aa   = a->a;
1905   ai   = a->i;
1906   aj   = a->j;
1907   mbs  = a->mbs;
1908 
1909   ierr = VecSet(v,zero);CHKERRQ(ierr);
1910   if (idx) {
1911     for (i=0; i<A->rmap.n;i++) idx[i] = 0;
1912   }
1913   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1914   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1915   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1916   for (i=0; i<mbs; i++) {
1917     ncols = ai[1] - ai[0]; ai++;
1918     brow  = bs*i;
1919     for (j=0; j<ncols; j++){
1920       for (kcol=0; kcol<bs; kcol++){
1921         for (krow=0; krow<bs; krow++){
1922           atmp = PetscAbsScalar(*aa);aa++;
1923           row = brow + krow;    /* row index */
1924           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1925           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
1926         }
1927       }
1928       aj++;
1929     }
1930   }
1931   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #undef __FUNCT__
1936 #define __FUNCT__ "MatCopy_SeqBAIJ"
1937 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1938 {
1939   PetscErrorCode ierr;
1940 
1941   PetscFunctionBegin;
1942   /* If the two matrices have the same copy implementation, use fast copy. */
1943   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1944     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1945     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1946 
1947     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1948       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1949     }
1950     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1951   } else {
1952     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1953   }
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 #undef __FUNCT__
1958 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1959 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1960 {
1961   PetscErrorCode ierr;
1962 
1963   PetscFunctionBegin;
1964   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1970 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1971 {
1972   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1973   PetscFunctionBegin;
1974   *array = a->a;
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 #undef __FUNCT__
1979 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1980 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1981 {
1982   PetscFunctionBegin;
1983   PetscFunctionReturn(0);
1984 }
1985 
1986 #include "petscblaslapack.h"
1987 #undef __FUNCT__
1988 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1989 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1990 {
1991   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1992   PetscErrorCode ierr;
1993   PetscInt       i,bs=Y->rmap.bs,j,bs2;
1994   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1995 
1996   PetscFunctionBegin;
1997   if (str == SAME_NONZERO_PATTERN) {
1998     PetscScalar alpha = a;
1999     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2000   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2001     if (y->xtoy && y->XtoY != X) {
2002       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2003       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2004     }
2005     if (!y->xtoy) { /* get xtoy */
2006       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2007       y->XtoY = X;
2008     }
2009     bs2 = bs*bs;
2010     for (i=0; i<x->nz; i++) {
2011       j = 0;
2012       while (j < bs2){
2013         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2014         j++;
2015       }
2016     }
2017     ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2018   } else {
2019     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2020   }
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 #undef __FUNCT__
2025 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2026 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2027 {
2028   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2029   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2030   PetscScalar    *aa = a->a;
2031 
2032   PetscFunctionBegin;
2033   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 #undef __FUNCT__
2038 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2039 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2040 {
2041   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2042   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2043   PetscScalar    *aa = a->a;
2044 
2045   PetscFunctionBegin;
2046   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 
2051 /* -------------------------------------------------------------------*/
2052 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2053        MatGetRow_SeqBAIJ,
2054        MatRestoreRow_SeqBAIJ,
2055        MatMult_SeqBAIJ_N,
2056 /* 4*/ MatMultAdd_SeqBAIJ_N,
2057        MatMultTranspose_SeqBAIJ,
2058        MatMultTransposeAdd_SeqBAIJ,
2059        MatSolve_SeqBAIJ_N,
2060        0,
2061        0,
2062 /*10*/ 0,
2063        MatLUFactor_SeqBAIJ,
2064        0,
2065        0,
2066        MatTranspose_SeqBAIJ,
2067 /*15*/ MatGetInfo_SeqBAIJ,
2068        MatEqual_SeqBAIJ,
2069        MatGetDiagonal_SeqBAIJ,
2070        MatDiagonalScale_SeqBAIJ,
2071        MatNorm_SeqBAIJ,
2072 /*20*/ 0,
2073        MatAssemblyEnd_SeqBAIJ,
2074        0,
2075        MatSetOption_SeqBAIJ,
2076        MatZeroEntries_SeqBAIJ,
2077 /*25*/ MatZeroRows_SeqBAIJ,
2078        MatLUFactorSymbolic_SeqBAIJ,
2079        MatLUFactorNumeric_SeqBAIJ_N,
2080        MatCholeskyFactorSymbolic_SeqBAIJ,
2081        MatCholeskyFactorNumeric_SeqBAIJ_N,
2082 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2083        MatILUFactorSymbolic_SeqBAIJ,
2084        MatICCFactorSymbolic_SeqBAIJ,
2085        MatGetArray_SeqBAIJ,
2086        MatRestoreArray_SeqBAIJ,
2087 /*35*/ MatDuplicate_SeqBAIJ,
2088        0,
2089        0,
2090        MatILUFactor_SeqBAIJ,
2091        0,
2092 /*40*/ MatAXPY_SeqBAIJ,
2093        MatGetSubMatrices_SeqBAIJ,
2094        MatIncreaseOverlap_SeqBAIJ,
2095        MatGetValues_SeqBAIJ,
2096        MatCopy_SeqBAIJ,
2097 /*45*/ 0,
2098        MatScale_SeqBAIJ,
2099        0,
2100        0,
2101        0,
2102 /*50*/ 0,
2103        MatGetRowIJ_SeqBAIJ,
2104        MatRestoreRowIJ_SeqBAIJ,
2105        0,
2106        0,
2107 /*55*/ 0,
2108        0,
2109        0,
2110        0,
2111        MatSetValuesBlocked_SeqBAIJ,
2112 /*60*/ MatGetSubMatrix_SeqBAIJ,
2113        MatDestroy_SeqBAIJ,
2114        MatView_SeqBAIJ,
2115        0,
2116        0,
2117 /*65*/ 0,
2118        0,
2119        0,
2120        0,
2121        0,
2122 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2123        MatConvert_Basic,
2124        0,
2125        0,
2126        0,
2127 /*75*/ 0,
2128        0,
2129        0,
2130        0,
2131        0,
2132 /*80*/ 0,
2133        0,
2134        0,
2135        0,
2136        MatLoad_SeqBAIJ,
2137 /*85*/ 0,
2138        0,
2139        0,
2140        0,
2141        0,
2142 /*90*/ 0,
2143        0,
2144        0,
2145        0,
2146        0,
2147 /*95*/ 0,
2148        0,
2149        0,
2150        0,
2151        0,
2152 /*100*/0,
2153        0,
2154        0,
2155        0,
2156        0,
2157 /*105*/0,
2158        MatRealPart_SeqBAIJ,
2159        MatImaginaryPart_SeqBAIJ
2160 };
2161 
2162 EXTERN_C_BEGIN
2163 #undef __FUNCT__
2164 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2165 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2166 {
2167   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2168   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2169   PetscErrorCode ierr;
2170 
2171   PetscFunctionBegin;
2172   if (aij->nonew != 1) {
2173     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2174   }
2175 
2176   /* allocate space for values if not already there */
2177   if (!aij->saved_values) {
2178     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2179   }
2180 
2181   /* copy values over */
2182   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2183   PetscFunctionReturn(0);
2184 }
2185 EXTERN_C_END
2186 
2187 EXTERN_C_BEGIN
2188 #undef __FUNCT__
2189 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2190 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2191 {
2192   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2193   PetscErrorCode ierr;
2194   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2195 
2196   PetscFunctionBegin;
2197   if (aij->nonew != 1) {
2198     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2199   }
2200   if (!aij->saved_values) {
2201     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2202   }
2203 
2204   /* copy values over */
2205   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2206   PetscFunctionReturn(0);
2207 }
2208 EXTERN_C_END
2209 
2210 EXTERN_C_BEGIN
2211 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2212 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2213 EXTERN_C_END
2214 
2215 EXTERN_C_BEGIN
2216 #undef __FUNCT__
2217 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2218 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2219 {
2220   Mat_SeqBAIJ    *b;
2221   PetscErrorCode ierr;
2222   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2223   PetscTruth     flg,skipallocation = PETSC_FALSE;
2224 
2225   PetscFunctionBegin;
2226 
2227   if (nz == MAT_SKIP_ALLOCATION) {
2228     skipallocation = PETSC_TRUE;
2229     nz             = 0;
2230   }
2231 
2232   ierr = PetscOptionsBegin(B->comm,B->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2233     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2234   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2235 
2236   if (nnz && newbs != bs) {
2237     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2238   }
2239   bs   = newbs;
2240 
2241   B->rmap.bs = B->cmap.bs = bs;
2242   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2243   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2244 
2245   B->preallocated = PETSC_TRUE;
2246 
2247   mbs  = B->rmap.n/bs;
2248   nbs  = B->cmap.n/bs;
2249   bs2  = bs*bs;
2250 
2251   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2252     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2253   }
2254 
2255   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2256   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2257   if (nnz) {
2258     for (i=0; i<mbs; i++) {
2259       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2260       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2261     }
2262   }
2263 
2264   b       = (Mat_SeqBAIJ*)B->data;
2265   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2266     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2267   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2268 
2269   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2270   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2271   if (!flg) {
2272     switch (bs) {
2273     case 1:
2274       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2275       B->ops->mult            = MatMult_SeqBAIJ_1;
2276       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2277       break;
2278     case 2:
2279       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2280       B->ops->mult            = MatMult_SeqBAIJ_2;
2281       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2282       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2283       break;
2284     case 3:
2285       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2286       B->ops->mult            = MatMult_SeqBAIJ_3;
2287       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2288       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2289       break;
2290     case 4:
2291       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2292       B->ops->mult            = MatMult_SeqBAIJ_4;
2293       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2294       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2295       break;
2296     case 5:
2297       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2298       B->ops->mult            = MatMult_SeqBAIJ_5;
2299       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2300       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2301       break;
2302     case 6:
2303       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2304       B->ops->mult            = MatMult_SeqBAIJ_6;
2305       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2306       break;
2307     case 7:
2308       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2309       B->ops->mult            = MatMult_SeqBAIJ_7;
2310       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2311       break;
2312     default:
2313       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2314       B->ops->mult            = MatMult_SeqBAIJ_N;
2315       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2316       break;
2317     }
2318   }
2319   B->rmap.bs      = bs;
2320   b->mbs     = mbs;
2321   b->nbs     = nbs;
2322   if (!skipallocation) {
2323     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2324     /* b->ilen will count nonzeros in each block row so far. */
2325     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2326     if (!nnz) {
2327       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2328       else if (nz <= 0)        nz = 1;
2329       for (i=0; i<mbs; i++) b->imax[i] = nz;
2330       nz = nz*mbs;
2331     } else {
2332       nz = 0;
2333       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2334     }
2335 
2336     /* allocate the matrix space */
2337     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2338     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2339     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2340     b->singlemalloc = PETSC_TRUE;
2341 
2342     b->i[0] = 0;
2343     for (i=1; i<mbs+1; i++) {
2344       b->i[i] = b->i[i-1] + b->imax[i-1];
2345     }
2346     b->free_a     = PETSC_TRUE;
2347     b->free_ij    = PETSC_TRUE;
2348   } else {
2349     b->free_a     = PETSC_FALSE;
2350     b->free_ij    = PETSC_FALSE;
2351   }
2352 
2353   B->rmap.bs          = bs;
2354   b->bs2              = bs2;
2355   b->mbs              = mbs;
2356   b->nz               = 0;
2357   b->maxnz            = nz*bs2;
2358   B->info.nz_unneeded = (PetscReal)b->maxnz;
2359   PetscFunctionReturn(0);
2360 }
2361 EXTERN_C_END
2362 
2363 /*MC
2364    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2365    block sparse compressed row format.
2366 
2367    Options Database Keys:
2368 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2369 
2370   Level: beginner
2371 
2372 .seealso: MatCreateSeqBAIJ()
2373 M*/
2374 
2375 EXTERN_C_BEGIN
2376 #undef __FUNCT__
2377 #define __FUNCT__ "MatCreate_SeqBAIJ"
2378 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2379 {
2380   PetscErrorCode ierr;
2381   PetscMPIInt    size;
2382   Mat_SeqBAIJ    *b;
2383 
2384   PetscFunctionBegin;
2385   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2386   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2387 
2388   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2389   B->data = (void*)b;
2390   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2391   B->factor           = 0;
2392   B->mapping          = 0;
2393   b->row              = 0;
2394   b->col              = 0;
2395   b->icol             = 0;
2396   b->reallocs         = 0;
2397   b->saved_values     = 0;
2398 #if defined(PETSC_USE_MAT_SINGLE)
2399   b->setvalueslen     = 0;
2400   b->setvaluescopy    = PETSC_NULL;
2401 #endif
2402 
2403   b->sorted           = PETSC_FALSE;
2404   b->roworiented      = PETSC_TRUE;
2405   b->nonew            = 0;
2406   b->diag             = 0;
2407   b->solve_work       = 0;
2408   b->mult_work        = 0;
2409   B->spptr            = 0;
2410   B->info.nz_unneeded = (PetscReal)b->maxnz;
2411   b->keepzeroedrows   = PETSC_FALSE;
2412   b->xtoy              = 0;
2413   b->XtoY              = 0;
2414   b->compressedrow.use     = PETSC_FALSE;
2415   b->compressedrow.nrows   = 0;
2416   b->compressedrow.i       = PETSC_NULL;
2417   b->compressedrow.rindex  = PETSC_NULL;
2418   b->compressedrow.checked = PETSC_FALSE;
2419   B->same_nonzero          = PETSC_FALSE;
2420 
2421   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2422                                      "MatInvertBlockDiagonal_SeqBAIJ",
2423                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2424   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2425                                      "MatStoreValues_SeqBAIJ",
2426                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2427   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2428                                      "MatRetrieveValues_SeqBAIJ",
2429                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2430   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2431                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2432                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2433   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2434                                      "MatConvert_SeqBAIJ_SeqAIJ",
2435                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2436   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2437                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2438                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2439   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2440                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2441                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2442   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2443   PetscFunctionReturn(0);
2444 }
2445 EXTERN_C_END
2446 
2447 #undef __FUNCT__
2448 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2449 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2450 {
2451   Mat            C;
2452   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2453   PetscErrorCode ierr;
2454   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2455 
2456   PetscFunctionBegin;
2457   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2458 
2459   *B = 0;
2460   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2461   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2462   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2463   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2464   c    = (Mat_SeqBAIJ*)C->data;
2465 
2466   C->rmap.N   = A->rmap.N;
2467   C->cmap.N   = A->cmap.N;
2468   C->rmap.bs  = A->rmap.bs;
2469   c->bs2 = a->bs2;
2470   c->mbs = a->mbs;
2471   c->nbs = a->nbs;
2472 
2473   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2474   for (i=0; i<mbs; i++) {
2475     c->imax[i] = a->imax[i];
2476     c->ilen[i] = a->ilen[i];
2477   }
2478 
2479   /* allocate the matrix space */
2480   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2481   c->singlemalloc = PETSC_TRUE;
2482   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2483   if (mbs > 0) {
2484     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2485     if (cpvalues == MAT_COPY_VALUES) {
2486       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2487     } else {
2488       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2489     }
2490   }
2491   c->sorted      = a->sorted;
2492   c->roworiented = a->roworiented;
2493   c->nonew       = a->nonew;
2494 
2495   if (a->diag) {
2496     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2497     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2498     for (i=0; i<mbs; i++) {
2499       c->diag[i] = a->diag[i];
2500     }
2501   } else c->diag        = 0;
2502   c->nz                 = a->nz;
2503   c->maxnz              = a->maxnz;
2504   c->solve_work         = 0;
2505   c->mult_work          = 0;
2506   c->free_a             = PETSC_TRUE;
2507   c->free_ij            = PETSC_TRUE;
2508   C->preallocated       = PETSC_TRUE;
2509   C->assembled          = PETSC_TRUE;
2510 
2511   c->compressedrow.use     = a->compressedrow.use;
2512   c->compressedrow.nrows   = a->compressedrow.nrows;
2513   c->compressedrow.checked = a->compressedrow.checked;
2514   if ( a->compressedrow.checked && a->compressedrow.use){
2515     i = a->compressedrow.nrows;
2516     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2517     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2518     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2519     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2520   } else {
2521     c->compressedrow.use    = PETSC_FALSE;
2522     c->compressedrow.i      = PETSC_NULL;
2523     c->compressedrow.rindex = PETSC_NULL;
2524   }
2525   C->same_nonzero = A->same_nonzero;
2526   *B = C;
2527   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2528   PetscFunctionReturn(0);
2529 }
2530 
2531 #undef __FUNCT__
2532 #define __FUNCT__ "MatLoad_SeqBAIJ"
2533 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2534 {
2535   Mat_SeqBAIJ    *a;
2536   Mat            B;
2537   PetscErrorCode ierr;
2538   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2539   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2540   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2541   PetscInt       *masked,nmask,tmp,bs2,ishift;
2542   PetscMPIInt    size;
2543   int            fd;
2544   PetscScalar    *aa;
2545   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2546 
2547   PetscFunctionBegin;
2548   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2549     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2550   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2551   bs2  = bs*bs;
2552 
2553   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2554   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2555   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2556   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2557   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2558   M = header[1]; N = header[2]; nz = header[3];
2559 
2560   if (header[3] < 0) {
2561     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2562   }
2563 
2564   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2565 
2566   /*
2567      This code adds extra rows to make sure the number of rows is
2568     divisible by the blocksize
2569   */
2570   mbs        = M/bs;
2571   extra_rows = bs - M + bs*(mbs);
2572   if (extra_rows == bs) extra_rows = 0;
2573   else                  mbs++;
2574   if (extra_rows) {
2575     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2576   }
2577 
2578   /* read in row lengths */
2579   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2580   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2581   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2582 
2583   /* read in column indices */
2584   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2585   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2586   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2587 
2588   /* loop over row lengths determining block row lengths */
2589   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2590   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2591   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2592   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2593   masked   = mask + mbs;
2594   rowcount = 0; nzcount = 0;
2595   for (i=0; i<mbs; i++) {
2596     nmask = 0;
2597     for (j=0; j<bs; j++) {
2598       kmax = rowlengths[rowcount];
2599       for (k=0; k<kmax; k++) {
2600         tmp = jj[nzcount++]/bs;
2601         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2602       }
2603       rowcount++;
2604     }
2605     browlengths[i] += nmask;
2606     /* zero out the mask elements we set */
2607     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2608   }
2609 
2610   /* create our matrix */
2611   ierr = MatCreate(comm,&B);
2612   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2613   ierr = MatSetType(B,type);CHKERRQ(ierr);
2614   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2615   a = (Mat_SeqBAIJ*)B->data;
2616 
2617   /* set matrix "i" values */
2618   a->i[0] = 0;
2619   for (i=1; i<= mbs; i++) {
2620     a->i[i]      = a->i[i-1] + browlengths[i-1];
2621     a->ilen[i-1] = browlengths[i-1];
2622   }
2623   a->nz         = 0;
2624   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2625 
2626   /* read in nonzero values */
2627   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2628   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2629   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2630 
2631   /* set "a" and "j" values into matrix */
2632   nzcount = 0; jcount = 0;
2633   for (i=0; i<mbs; i++) {
2634     nzcountb = nzcount;
2635     nmask    = 0;
2636     for (j=0; j<bs; j++) {
2637       kmax = rowlengths[i*bs+j];
2638       for (k=0; k<kmax; k++) {
2639         tmp = jj[nzcount++]/bs;
2640 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2641       }
2642     }
2643     /* sort the masked values */
2644     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2645 
2646     /* set "j" values into matrix */
2647     maskcount = 1;
2648     for (j=0; j<nmask; j++) {
2649       a->j[jcount++]  = masked[j];
2650       mask[masked[j]] = maskcount++;
2651     }
2652     /* set "a" values into matrix */
2653     ishift = bs2*a->i[i];
2654     for (j=0; j<bs; j++) {
2655       kmax = rowlengths[i*bs+j];
2656       for (k=0; k<kmax; k++) {
2657         tmp       = jj[nzcountb]/bs ;
2658         block     = mask[tmp] - 1;
2659         point     = jj[nzcountb] - bs*tmp;
2660         idx       = ishift + bs2*block + j + bs*point;
2661         a->a[idx] = (MatScalar)aa[nzcountb++];
2662       }
2663     }
2664     /* zero out the mask elements we set */
2665     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2666   }
2667   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2668 
2669   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2670   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2671   ierr = PetscFree(aa);CHKERRQ(ierr);
2672   ierr = PetscFree(jj);CHKERRQ(ierr);
2673   ierr = PetscFree(mask);CHKERRQ(ierr);
2674 
2675   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2676   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2677   ierr = MatView_Private(B);CHKERRQ(ierr);
2678 
2679   *A = B;
2680   PetscFunctionReturn(0);
2681 }
2682 
2683 #undef __FUNCT__
2684 #define __FUNCT__ "MatCreateSeqBAIJ"
2685 /*@C
2686    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2687    compressed row) format.  For good matrix assembly performance the
2688    user should preallocate the matrix storage by setting the parameter nz
2689    (or the array nnz).  By setting these parameters accurately, performance
2690    during matrix assembly can be increased by more than a factor of 50.
2691 
2692    Collective on MPI_Comm
2693 
2694    Input Parameters:
2695 +  comm - MPI communicator, set to PETSC_COMM_SELF
2696 .  bs - size of block
2697 .  m - number of rows
2698 .  n - number of columns
2699 .  nz - number of nonzero blocks  per block row (same for all rows)
2700 -  nnz - array containing the number of nonzero blocks in the various block rows
2701          (possibly different for each block row) or PETSC_NULL
2702 
2703    Output Parameter:
2704 .  A - the matrix
2705 
2706    Options Database Keys:
2707 .   -mat_no_unroll - uses code that does not unroll the loops in the
2708                      block calculations (much slower)
2709 .    -mat_block_size - size of the blocks to use
2710 
2711    Level: intermediate
2712 
2713    Notes:
2714    The number of rows and columns must be divisible by blocksize.
2715 
2716    If the nnz parameter is given then the nz parameter is ignored
2717 
2718    A nonzero block is any block that as 1 or more nonzeros in it
2719 
2720    The block AIJ format is fully compatible with standard Fortran 77
2721    storage.  That is, the stored row and column indices can begin at
2722    either one (as in Fortran) or zero.  See the users' manual for details.
2723 
2724    Specify the preallocated storage with either nz or nnz (not both).
2725    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2726    allocation.  For additional details, see the users manual chapter on
2727    matrices.
2728 
2729 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2730 @*/
2731 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2732 {
2733   PetscErrorCode ierr;
2734 
2735   PetscFunctionBegin;
2736   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2737   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2738   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2739   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2740   PetscFunctionReturn(0);
2741 }
2742 
2743 #undef __FUNCT__
2744 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2745 /*@C
2746    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2747    per row in the matrix. For good matrix assembly performance the
2748    user should preallocate the matrix storage by setting the parameter nz
2749    (or the array nnz).  By setting these parameters accurately, performance
2750    during matrix assembly can be increased by more than a factor of 50.
2751 
2752    Collective on MPI_Comm
2753 
2754    Input Parameters:
2755 +  A - the matrix
2756 .  bs - size of block
2757 .  nz - number of block nonzeros per block row (same for all rows)
2758 -  nnz - array containing the number of block nonzeros in the various block rows
2759          (possibly different for each block row) or PETSC_NULL
2760 
2761    Options Database Keys:
2762 .   -mat_no_unroll - uses code that does not unroll the loops in the
2763                      block calculations (much slower)
2764 .    -mat_block_size - size of the blocks to use
2765 
2766    Level: intermediate
2767 
2768    Notes:
2769    If the nnz parameter is given then the nz parameter is ignored
2770 
2771    The block AIJ format is fully compatible with standard Fortran 77
2772    storage.  That is, the stored row and column indices can begin at
2773    either one (as in Fortran) or zero.  See the users' manual for details.
2774 
2775    Specify the preallocated storage with either nz or nnz (not both).
2776    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2777    allocation.  For additional details, see the users manual chapter on
2778    matrices.
2779 
2780 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2781 @*/
2782 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2783 {
2784   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2785 
2786   PetscFunctionBegin;
2787   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2788   if (f) {
2789     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2790   }
2791   PetscFunctionReturn(0);
2792 }
2793 
2794 #undef __FUNCT__
2795 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
2796 /*@
2797      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2798               (upper triangular entries in CSR format) provided by the user.
2799 
2800      Collective on MPI_Comm
2801 
2802    Input Parameters:
2803 +  comm - must be an MPI communicator of size 1
2804 .  bs - size of block
2805 .  m - number of rows
2806 .  n - number of columns
2807 .  i - row indices
2808 .  j - column indices
2809 -  a - matrix values
2810 
2811    Output Parameter:
2812 .  mat - the matrix
2813 
2814    Level: intermediate
2815 
2816    Notes:
2817        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2818     once the matrix is destroyed
2819 
2820        You cannot set new nonzero locations into this matrix, that will generate an error.
2821 
2822        The i and j indices are 0 based
2823 
2824 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2825 
2826 @*/
2827 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2828 {
2829   PetscErrorCode ierr;
2830   PetscInt       ii;
2831   Mat_SeqBAIJ    *baij;
2832 
2833   PetscFunctionBegin;
2834   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2835   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2836 
2837   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2838   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2839   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
2840   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2841   baij = (Mat_SeqBAIJ*)(*mat)->data;
2842   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
2843 
2844   baij->i = i;
2845   baij->j = j;
2846   baij->a = a;
2847   baij->singlemalloc = PETSC_FALSE;
2848   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2849   baij->free_a       = PETSC_FALSE;
2850   baij->free_ij       = PETSC_FALSE;
2851 
2852   for (ii=0; ii<m; ii++) {
2853     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2854 #if defined(PETSC_USE_DEBUG)
2855     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2856 #endif
2857   }
2858 #if defined(PETSC_USE_DEBUG)
2859   for (ii=0; ii<baij->i[m]; ii++) {
2860     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2861     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2862   }
2863 #endif
2864 
2865   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2866   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2867   PetscFunctionReturn(0);
2868 }
2869