xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 0e5e90baed20fcf66590e8d3abe9cacb3e2155d0)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   its = its*lits;
129   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
130   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
131   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
132   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
133   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
134 
135   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
136 
137   diag  = a->diag;
138   idiag = a->idiag;
139   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
140   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
141 
142   if (flag & SOR_ZERO_INITIAL_GUESS) {
143     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
144       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
145       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
146       i2     = 2;
147       idiag += 4;
148       for (i=1; i<m; i++) {
149 	v     = aa + 4*ai[i];
150 	vi    = aj + ai[i];
151 	nz    = diag[i] - ai[i];
152 	s1    = b[i2]; s2 = b[i2+1];
153 	while (nz--) {
154 	  idx  = 2*(*vi++);
155 	  x1   = x[idx]; x2 = x[1+idx];
156 	  s1  -= v[0]*x1 + v[2]*x2;
157 	  s2  -= v[1]*x1 + v[3]*x2;
158 	  v   += 4;
159 	}
160 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
161 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
162         idiag   += 4;
163         i2      += 2;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+4*a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2]; x2 = x[i2+1];
174         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
175         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
176         mdiag  += 4;
177         i2     += 2;
178       }
179       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
180     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
181       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
182     }
183     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
184       idiag   = a->idiag+4*a->mbs - 4;
185       i2      = 2*m - 2;
186       x1      = x[i2]; x2 = x[i2+1];
187       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
188       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
189       idiag -= 4;
190       i2    -= 2;
191       for (i=m-2; i>=0; i--) {
192 	v     = aa + 4*(diag[i]+1);
193 	vi    = aj + diag[i] + 1;
194 	nz    = ai[i+1] - diag[i] - 1;
195 	s1    = x[i2]; s2 = x[i2+1];
196 	while (nz--) {
197 	  idx  = 2*(*vi++);
198 	  x1   = x[idx]; x2 = x[1+idx];
199 	  s1  -= v[0]*x1 + v[2]*x2;
200 	  s2  -= v[1]*x1 + v[3]*x2;
201 	  v   += 4;
202 	}
203 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
204 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
205         idiag   -= 4;
206         i2      -= 2;
207       }
208       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
224   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
225   PetscErrorCode     ierr;
226   PetscInt           m = a->mbs,i,i2,nz,idx;
227   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
228 
229   PetscFunctionBegin;
230   its = its*lits;
231   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
235   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
236 
237   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
238 
239   diag  = a->diag;
240   idiag = a->idiag;
241   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
243 
244   if (flag & SOR_ZERO_INITIAL_GUESS) {
245     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
246       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
247       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
248       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
249       i2     = 3;
250       idiag += 9;
251       for (i=1; i<m; i++) {
252 	v     = aa + 9*ai[i];
253 	vi    = aj + ai[i];
254 	nz    = diag[i] - ai[i];
255 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
256 	while (nz--) {
257 	  idx  = 3*(*vi++);
258 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
259 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
260 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
261 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
262 	  v   += 9;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
265 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
266 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
267         idiag   += 9;
268         i2      += 3;
269       }
270       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
271       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
272     }
273     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
274         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
275       i2    = 0;
276       mdiag = a->idiag+9*a->mbs;
277       for (i=0; i<m; i++) {
278         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
279         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
280         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
281         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
282         mdiag  += 9;
283         i2     += 3;
284       }
285       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
286     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
287       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
288     }
289     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
290       idiag   = a->idiag+9*a->mbs - 9;
291       i2      = 3*m - 3;
292       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
293       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
294       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
295       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
296       idiag -= 9;
297       i2    -= 3;
298       for (i=m-2; i>=0; i--) {
299 	v     = aa + 9*(diag[i]+1);
300 	vi    = aj + diag[i] + 1;
301 	nz    = ai[i+1] - diag[i] - 1;
302 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
303 	while (nz--) {
304 	  idx  = 3*(*vi++);
305 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
306 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
307 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
308 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
309 	  v   += 9;
310 	}
311 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
312 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
313 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
314         idiag   -= 9;
315         i2      -= 3;
316       }
317       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
318     }
319   } else {
320     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
321   }
322   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
323   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
330 {
331   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
332   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
333   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
334   PetscErrorCode     ierr;
335   PetscInt           m = a->mbs,i,i2,nz,idx;
336   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
337 
338   PetscFunctionBegin;
339   its = its*lits;
340   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
341   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
342   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
343   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
344   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
345 
346   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
347 
348   diag  = a->diag;
349   idiag = a->idiag;
350   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
351   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
352 
353   if (flag & SOR_ZERO_INITIAL_GUESS) {
354     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
355       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
356       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
357       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
358       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
359       i2     = 4;
360       idiag += 16;
361       for (i=1; i<m; i++) {
362 	v     = aa + 16*ai[i];
363 	vi    = aj + ai[i];
364 	nz    = diag[i] - ai[i];
365 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
366 	while (nz--) {
367 	  idx  = 4*(*vi++);
368 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
369 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
370 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
371 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
372 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
373 	  v   += 16;
374 	}
375 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
376 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
377 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
378 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
379         idiag   += 16;
380         i2      += 4;
381       }
382       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
383       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
384     }
385     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
386         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
387       i2    = 0;
388       mdiag = a->idiag+16*a->mbs;
389       for (i=0; i<m; i++) {
390         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
391         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
392         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
393         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
394         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
395         mdiag  += 16;
396         i2     += 4;
397       }
398       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
399     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
400       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
401     }
402     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
403       idiag   = a->idiag+16*a->mbs - 16;
404       i2      = 4*m - 4;
405       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
406       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
407       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
408       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
409       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
410       idiag -= 16;
411       i2    -= 4;
412       for (i=m-2; i>=0; i--) {
413 	v     = aa + 16*(diag[i]+1);
414 	vi    = aj + diag[i] + 1;
415 	nz    = ai[i+1] - diag[i] - 1;
416 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
417 	while (nz--) {
418 	  idx  = 4*(*vi++);
419 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
420 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
421 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
422 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
423 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
424 	  v   += 16;
425 	}
426 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
427 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
428 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
429 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
430         idiag   -= 16;
431         i2      -= 4;
432       }
433       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
434     }
435   } else {
436     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437   }
438   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
439   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446 {
447   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
448   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
449   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
450   PetscErrorCode     ierr;
451   PetscInt           m = a->mbs,i,i2,nz,idx;
452   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
453 
454   PetscFunctionBegin;
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
475       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
476       i2     = 5;
477       idiag += 25;
478       for (i=1; i<m; i++) {
479 	v     = aa + 25*ai[i];
480 	vi    = aj + ai[i];
481 	nz    = diag[i] - ai[i];
482 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
483 	while (nz--) {
484 	  idx  = 5*(*vi++);
485 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
486 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
487 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
488 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
489 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
490 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
491 	  v   += 25;
492 	}
493 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
494 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
495 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
496 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
497 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
498         idiag   += 25;
499         i2      += 5;
500       }
501       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
502       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
503     }
504     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
505         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
506       i2    = 0;
507       mdiag = a->idiag+25*a->mbs;
508       for (i=0; i<m; i++) {
509         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
510         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
511         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
512         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
513         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
514         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
515         mdiag  += 25;
516         i2     += 5;
517       }
518       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
519     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
520       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
521     }
522     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
523       idiag   = a->idiag+25*a->mbs - 25;
524       i2      = 5*m - 5;
525       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
526       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
527       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
528       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
529       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
530       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
531       idiag -= 25;
532       i2    -= 5;
533       for (i=m-2; i>=0; i--) {
534 	v     = aa + 25*(diag[i]+1);
535 	vi    = aj + diag[i] + 1;
536 	nz    = ai[i+1] - diag[i] - 1;
537 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
538 	while (nz--) {
539 	  idx  = 5*(*vi++);
540 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
541 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
542 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
543 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
544 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
545 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
546 	  v   += 25;
547 	}
548 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
549 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
550 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
551 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
552 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
553         idiag   -= 25;
554         i2      -= 5;
555       }
556       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     Special version for Fun3d sequential benchmark
568 */
569 #if defined(PETSC_HAVE_FORTRAN_CAPS)
570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
572 #define matsetvaluesblocked4_ matsetvaluesblocked4
573 #endif
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "matsetvaluesblocked4_"
578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
579 {
580   Mat               A = *AA;
581   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
582   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
583   PetscInt          *ai=a->i,*ailen=a->ilen;
584   PetscInt          *aj=a->j,stepval;
585   const PetscScalar *value = v;
586   MatScalar         *ap,*aa = a->a,*bap;
587 
588   PetscFunctionBegin;
589   stepval = (n-1)*4;
590   for (k=0; k<m; k++) { /* loop over added rows */
591     row  = im[k];
592     rp   = aj + ai[row];
593     ap   = aa + 16*ai[row];
594     nrow = ailen[row];
595     low  = 0;
596     for (l=0; l<n; l++) { /* loop over added columns */
597       col = in[l];
598       value = v + k*(stepval+4)*4 + l*4;
599       low = 0; high = nrow;
600       while (high-low > 7) {
601         t = (low+high)/2;
602         if (rp[t] > col) high = t;
603         else             low  = t;
604       }
605       for (i=low; i<high; i++) {
606         if (rp[i] > col) break;
607         if (rp[i] == col) {
608           bap  = ap +  16*i;
609           for (ii=0; ii<4; ii++,value+=stepval) {
610             for (jj=ii; jj<16; jj+=4) {
611               bap[jj] += *value++;
612             }
613           }
614           goto noinsert2;
615         }
616       }
617       N = nrow++ - 1;
618       /* shift up all the later entries in this row */
619       for (ii=N; ii>=i; ii--) {
620         rp[ii+1] = rp[ii];
621         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
622       }
623       if (N >= i) {
624         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
625       }
626       rp[i] = col;
627       bap   = ap +  16*i;
628       for (ii=0; ii<4; ii++,value+=stepval) {
629         for (jj=ii; jj<16; jj+=4) {
630           bap[jj] = *value++;
631         }
632       }
633       noinsert2:;
634       low = i;
635     }
636     ailen[row] = nrow;
637   }
638   PetscFunctionReturnVoid();
639 }
640 EXTERN_C_END
641 
642 #if defined(PETSC_HAVE_FORTRAN_CAPS)
643 #define matsetvalues4_ MATSETVALUES4
644 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
645 #define matsetvalues4_ matsetvalues4
646 #endif
647 
648 EXTERN_C_BEGIN
649 #undef __FUNCT__
650 #define __FUNCT__ "MatSetValues4_"
651 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
652 {
653   Mat         A = *AA;
654   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
655   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
656   PetscInt    *ai=a->i,*ailen=a->ilen;
657   PetscInt    *aj=a->j,brow,bcol;
658   PetscInt    ridx,cidx;
659   MatScalar   *ap,value,*aa=a->a,*bap;
660 
661   PetscFunctionBegin;
662   for (k=0; k<m; k++) { /* loop over added rows */
663     row  = im[k]; brow = row/4;
664     rp   = aj + ai[brow];
665     ap   = aa + 16*ai[brow];
666     nrow = ailen[brow];
667     low  = 0;
668     for (l=0; l<n; l++) { /* loop over added columns */
669       col = in[l]; bcol = col/4;
670       ridx = row % 4; cidx = col % 4;
671       value = v[l + k*n];
672       low = 0; high = nrow;
673       while (high-low > 7) {
674         t = (low+high)/2;
675         if (rp[t] > bcol) high = t;
676         else              low  = t;
677       }
678       for (i=low; i<high; i++) {
679         if (rp[i] > bcol) break;
680         if (rp[i] == bcol) {
681           bap  = ap +  16*i + 4*cidx + ridx;
682           *bap += value;
683           goto noinsert1;
684         }
685       }
686       N = nrow++ - 1;
687       /* shift up all the later entries in this row */
688       for (ii=N; ii>=i; ii--) {
689         rp[ii+1] = rp[ii];
690         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
691       }
692       if (N>=i) {
693         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
694       }
695       rp[i]                    = bcol;
696       ap[16*i + 4*cidx + ridx] = value;
697       noinsert1:;
698       low = i;
699     }
700     ailen[brow] = nrow;
701   }
702   PetscFunctionReturnVoid();
703 }
704 EXTERN_C_END
705 
706 /*  UGLY, ugly, ugly
707    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
708    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
709    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
710    converts the entries into single precision and then calls ..._MatScalar() to put them
711    into the single precision data structures.
712 */
713 #if defined(PETSC_USE_MAT_SINGLE)
714 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
715 #else
716 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
717 #endif
718 
719 #define CHUNKSIZE  10
720 
721 /*
722      Checks for missing diagonals
723 */
724 #undef __FUNCT__
725 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
726 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
727 {
728   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
729   PetscErrorCode ierr;
730   PetscInt       *diag,*jj = a->j,i;
731 
732   PetscFunctionBegin;
733   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
734   diag = a->diag;
735   for (i=0; i<a->mbs; i++) {
736     if (jj[diag[i]] != i) {
737       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
738     }
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
745 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
746 {
747   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
748   PetscErrorCode ierr;
749   PetscInt       i,j,*diag,m = a->mbs;
750 
751   PetscFunctionBegin;
752   if (a->diag) PetscFunctionReturn(0);
753 
754   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
755   ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
756   for (i=0; i<m; i++) {
757     diag[i] = a->i[i+1];
758     for (j=a->i[i]; j<a->i[i+1]; j++) {
759       if (a->j[j] == i) {
760         diag[i] = j;
761         break;
762       }
763     }
764   }
765   a->diag = diag;
766   PetscFunctionReturn(0);
767 }
768 
769 
770 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
771 
772 #undef __FUNCT__
773 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
774 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
775 {
776   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
777   PetscErrorCode ierr;
778   PetscInt       n = a->mbs,i;
779 
780   PetscFunctionBegin;
781   *nn = n;
782   if (!ia) PetscFunctionReturn(0);
783   if (symmetric) {
784     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
785   } else if (oshift == 1) {
786     /* temporarily add 1 to i and j indices */
787     PetscInt nz = a->i[n];
788     for (i=0; i<nz; i++) a->j[i]++;
789     for (i=0; i<n+1; i++) a->i[i]++;
790     *ia = a->i; *ja = a->j;
791   } else {
792     *ia = a->i; *ja = a->j;
793   }
794 
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
800 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
801 {
802   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
803   PetscErrorCode ierr;
804   PetscInt       i,n = a->mbs;
805 
806   PetscFunctionBegin;
807   if (!ia) PetscFunctionReturn(0);
808   if (symmetric) {
809     ierr = PetscFree(*ia);CHKERRQ(ierr);
810     ierr = PetscFree(*ja);CHKERRQ(ierr);
811   } else if (oshift == 1) {
812     PetscInt nz = a->i[n]-1;
813     for (i=0; i<nz; i++) a->j[i]--;
814     for (i=0; i<n+1; i++) a->i[i]--;
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatDestroy_SeqBAIJ"
821 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
822 {
823   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
824   PetscErrorCode ierr;
825 
826   PetscFunctionBegin;
827 #if defined(PETSC_USE_LOG)
828   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
829 #endif
830   ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr);
831   if (a->row) {
832     ierr = ISDestroy(a->row);CHKERRQ(ierr);
833   }
834   if (a->col) {
835     ierr = ISDestroy(a->col);CHKERRQ(ierr);
836   }
837   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
838   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
839   if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
840   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
841   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
842   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
843   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
844 #if defined(PETSC_USE_MAT_SINGLE)
845   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
846 #endif
847   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
848   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
849 
850   ierr = PetscFree(a);CHKERRQ(ierr);
851 
852   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
853   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
854   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
855   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
856   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
857   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
858   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 #undef __FUNCT__
863 #define __FUNCT__ "MatSetOption_SeqBAIJ"
864 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
865 {
866   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
867   PetscErrorCode ierr;
868 
869   PetscFunctionBegin;
870   switch (op) {
871   case MAT_ROW_ORIENTED:
872     a->roworiented    = PETSC_TRUE;
873     break;
874   case MAT_COLUMN_ORIENTED:
875     a->roworiented    = PETSC_FALSE;
876     break;
877   case MAT_COLUMNS_SORTED:
878     a->sorted         = PETSC_TRUE;
879     break;
880   case MAT_COLUMNS_UNSORTED:
881     a->sorted         = PETSC_FALSE;
882     break;
883   case MAT_KEEP_ZEROED_ROWS:
884     a->keepzeroedrows = PETSC_TRUE;
885     break;
886   case MAT_NO_NEW_NONZERO_LOCATIONS:
887     a->nonew          = 1;
888     break;
889   case MAT_NEW_NONZERO_LOCATION_ERR:
890     a->nonew          = -1;
891     break;
892   case MAT_NEW_NONZERO_ALLOCATION_ERR:
893     a->nonew          = -2;
894     break;
895   case MAT_YES_NEW_NONZERO_LOCATIONS:
896     a->nonew          = 0;
897     break;
898   case MAT_ROWS_SORTED:
899   case MAT_ROWS_UNSORTED:
900   case MAT_YES_NEW_DIAGONALS:
901   case MAT_IGNORE_OFF_PROC_ENTRIES:
902   case MAT_USE_HASH_TABLE:
903     ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr);
904     break;
905   case MAT_NO_NEW_DIAGONALS:
906     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
907   case MAT_SYMMETRIC:
908   case MAT_STRUCTURALLY_SYMMETRIC:
909   case MAT_NOT_SYMMETRIC:
910   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
911   case MAT_HERMITIAN:
912   case MAT_NOT_HERMITIAN:
913   case MAT_SYMMETRY_ETERNAL:
914   case MAT_NOT_SYMMETRY_ETERNAL:
915     break;
916   default:
917     SETERRQ(PETSC_ERR_SUP,"unknown option");
918   }
919   PetscFunctionReturn(0);
920 }
921 
922 #undef __FUNCT__
923 #define __FUNCT__ "MatGetRow_SeqBAIJ"
924 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
925 {
926   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
927   PetscErrorCode ierr;
928   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
929   MatScalar      *aa,*aa_i;
930   PetscScalar    *v_i;
931 
932   PetscFunctionBegin;
933   bs  = A->bs;
934   ai  = a->i;
935   aj  = a->j;
936   aa  = a->a;
937   bs2 = a->bs2;
938 
939   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
940 
941   bn  = row/bs;   /* Block number */
942   bp  = row % bs; /* Block Position */
943   M   = ai[bn+1] - ai[bn];
944   *nz = bs*M;
945 
946   if (v) {
947     *v = 0;
948     if (*nz) {
949       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
950       for (i=0; i<M; i++) { /* for each block in the block row */
951         v_i  = *v + i*bs;
952         aa_i = aa + bs2*(ai[bn] + i);
953         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
954       }
955     }
956   }
957 
958   if (idx) {
959     *idx = 0;
960     if (*nz) {
961       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
962       for (i=0; i<M; i++) { /* for each block in the block row */
963         idx_i = *idx + i*bs;
964         itmp  = bs*aj[ai[bn] + i];
965         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
966       }
967     }
968   }
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
974 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
975 {
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
980   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatTranspose_SeqBAIJ"
986 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
987 {
988   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
989   Mat            C;
990   PetscErrorCode ierr;
991   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
992   PetscInt       *rows,*cols,bs2=a->bs2;
993   PetscScalar    *array;
994 
995   PetscFunctionBegin;
996   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
997   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
998   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
999 
1000 #if defined(PETSC_USE_MAT_SINGLE)
1001   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1002   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1003 #else
1004   array = a->a;
1005 #endif
1006 
1007   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1008   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1009   ierr = MatSetSizes(C,A->n,A->m,A->n,A->m);CHKERRQ(ierr);
1010   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1011   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1012   ierr = PetscFree(col);CHKERRQ(ierr);
1013   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1014   cols = rows + bs;
1015   for (i=0; i<mbs; i++) {
1016     cols[0] = i*bs;
1017     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1018     len = ai[i+1] - ai[i];
1019     for (j=0; j<len; j++) {
1020       rows[0] = (*aj++)*bs;
1021       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1022       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1023       array += bs2;
1024     }
1025   }
1026   ierr = PetscFree(rows);CHKERRQ(ierr);
1027 #if defined(PETSC_USE_MAT_SINGLE)
1028   ierr = PetscFree(array);
1029 #endif
1030 
1031   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1032   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1033 
1034   if (B) {
1035     *B = C;
1036   } else {
1037     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1038   }
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1044 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1045 {
1046   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1047   PetscErrorCode ierr;
1048   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1049   int            fd;
1050   PetscScalar    *aa;
1051   FILE           *file;
1052 
1053   PetscFunctionBegin;
1054   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1055   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1056   col_lens[0] = MAT_FILE_COOKIE;
1057 
1058   col_lens[1] = A->m;
1059   col_lens[2] = A->n;
1060   col_lens[3] = a->nz*bs2;
1061 
1062   /* store lengths of each row and write (including header) to file */
1063   count = 0;
1064   for (i=0; i<a->mbs; i++) {
1065     for (j=0; j<bs; j++) {
1066       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1067     }
1068   }
1069   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1070   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1071 
1072   /* store column indices (zero start index) */
1073   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1074   count = 0;
1075   for (i=0; i<a->mbs; i++) {
1076     for (j=0; j<bs; j++) {
1077       for (k=a->i[i]; k<a->i[i+1]; k++) {
1078         for (l=0; l<bs; l++) {
1079           jj[count++] = bs*a->j[k] + l;
1080         }
1081       }
1082     }
1083   }
1084   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1085   ierr = PetscFree(jj);CHKERRQ(ierr);
1086 
1087   /* store nonzero values */
1088   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1089   count = 0;
1090   for (i=0; i<a->mbs; i++) {
1091     for (j=0; j<bs; j++) {
1092       for (k=a->i[i]; k<a->i[i+1]; k++) {
1093         for (l=0; l<bs; l++) {
1094           aa[count++] = a->a[bs2*k + l*bs + j];
1095         }
1096       }
1097     }
1098   }
1099   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1100   ierr = PetscFree(aa);CHKERRQ(ierr);
1101 
1102   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1103   if (file) {
1104     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1105   }
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1111 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1112 {
1113   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1114   PetscErrorCode    ierr;
1115   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1116   PetscViewerFormat format;
1117 
1118   PetscFunctionBegin;
1119   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1120   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1121     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1122   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1123     Mat aij;
1124     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1125     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1126     ierr = MatDestroy(aij);CHKERRQ(ierr);
1127   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1128      PetscFunctionReturn(0);
1129   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1130     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1131     for (i=0; i<a->mbs; i++) {
1132       for (j=0; j<bs; j++) {
1133         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1134         for (k=a->i[i]; k<a->i[i+1]; k++) {
1135           for (l=0; l<bs; l++) {
1136 #if defined(PETSC_USE_COMPLEX)
1137             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1138               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1139                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1140             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1141               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1142                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1143             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1144               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1145             }
1146 #else
1147             if (a->a[bs2*k + l*bs + j] != 0.0) {
1148               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1149             }
1150 #endif
1151           }
1152         }
1153         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1154       }
1155     }
1156     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1157   } else {
1158     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1159     for (i=0; i<a->mbs; i++) {
1160       for (j=0; j<bs; j++) {
1161         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1162         for (k=a->i[i]; k<a->i[i+1]; k++) {
1163           for (l=0; l<bs; l++) {
1164 #if defined(PETSC_USE_COMPLEX)
1165             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1166               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1167                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1168             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1169               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1170                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1171             } else {
1172               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1173             }
1174 #else
1175             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1176 #endif
1177           }
1178         }
1179         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1180       }
1181     }
1182     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1183   }
1184   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1190 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1191 {
1192   Mat            A = (Mat) Aa;
1193   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1194   PetscErrorCode ierr;
1195   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
1196   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1197   MatScalar      *aa;
1198   PetscViewer    viewer;
1199 
1200   PetscFunctionBegin;
1201 
1202   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1203   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1204 
1205   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1206 
1207   /* loop over matrix elements drawing boxes */
1208   color = PETSC_DRAW_BLUE;
1209   for (i=0,row=0; i<mbs; i++,row+=bs) {
1210     for (j=a->i[i]; j<a->i[i+1]; j++) {
1211       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1212       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1213       aa = a->a + j*bs2;
1214       for (k=0; k<bs; k++) {
1215         for (l=0; l<bs; l++) {
1216           if (PetscRealPart(*aa++) >=  0.) continue;
1217           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1218         }
1219       }
1220     }
1221   }
1222   color = PETSC_DRAW_CYAN;
1223   for (i=0,row=0; i<mbs; i++,row+=bs) {
1224     for (j=a->i[i]; j<a->i[i+1]; j++) {
1225       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1226       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1227       aa = a->a + j*bs2;
1228       for (k=0; k<bs; k++) {
1229         for (l=0; l<bs; l++) {
1230           if (PetscRealPart(*aa++) != 0.) continue;
1231           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1232         }
1233       }
1234     }
1235   }
1236 
1237   color = PETSC_DRAW_RED;
1238   for (i=0,row=0; i<mbs; i++,row+=bs) {
1239     for (j=a->i[i]; j<a->i[i+1]; j++) {
1240       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1241       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1242       aa = a->a + j*bs2;
1243       for (k=0; k<bs; k++) {
1244         for (l=0; l<bs; l++) {
1245           if (PetscRealPart(*aa++) <= 0.) continue;
1246           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1247         }
1248       }
1249     }
1250   }
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1256 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1257 {
1258   PetscErrorCode ierr;
1259   PetscReal      xl,yl,xr,yr,w,h;
1260   PetscDraw      draw;
1261   PetscTruth     isnull;
1262 
1263   PetscFunctionBegin;
1264 
1265   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1266   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1267 
1268   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1269   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1270   xr += w;    yr += h;  xl = -w;     yl = -h;
1271   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1272   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1273   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatView_SeqBAIJ"
1279 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1280 {
1281   PetscErrorCode ierr;
1282   PetscTruth     iascii,isbinary,isdraw;
1283 
1284   PetscFunctionBegin;
1285   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1286   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1287   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1288   if (iascii){
1289     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1290   } else if (isbinary) {
1291     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1292   } else if (isdraw) {
1293     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1294   } else {
1295     Mat B;
1296     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1297     ierr = MatView(B,viewer);CHKERRQ(ierr);
1298     ierr = MatDestroy(B);CHKERRQ(ierr);
1299   }
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1306 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1307 {
1308   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1309   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1310   PetscInt    *ai = a->i,*ailen = a->ilen;
1311   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
1312   MatScalar   *ap,*aa = a->a,zero = 0.0;
1313 
1314   PetscFunctionBegin;
1315   for (k=0; k<m; k++) { /* loop over rows */
1316     row  = im[k]; brow = row/bs;
1317     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1318     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1319     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1320     nrow = ailen[brow];
1321     for (l=0; l<n; l++) { /* loop over columns */
1322       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1323       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1324       col  = in[l] ;
1325       bcol = col/bs;
1326       cidx = col%bs;
1327       ridx = row%bs;
1328       high = nrow;
1329       low  = 0; /* assume unsorted */
1330       while (high-low > 5) {
1331         t = (low+high)/2;
1332         if (rp[t] > bcol) high = t;
1333         else             low  = t;
1334       }
1335       for (i=low; i<high; i++) {
1336         if (rp[i] > bcol) break;
1337         if (rp[i] == bcol) {
1338           *v++ = ap[bs2*i+bs*cidx+ridx];
1339           goto finished;
1340         }
1341       }
1342       *v++ = zero;
1343       finished:;
1344     }
1345   }
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #if defined(PETSC_USE_MAT_SINGLE)
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1352 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1353 {
1354   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1355   PetscErrorCode ierr;
1356   PetscInt       i,N = m*n*b->bs2;
1357   MatScalar      *vsingle;
1358 
1359   PetscFunctionBegin;
1360   if (N > b->setvalueslen) {
1361     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1362     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1363     b->setvalueslen  = N;
1364   }
1365   vsingle = b->setvaluescopy;
1366   for (i=0; i<N; i++) {
1367     vsingle[i] = v[i];
1368   }
1369   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 #endif
1373 
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1377 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1378 {
1379   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1380   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1381   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1382   PetscErrorCode  ierr;
1383   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1384   PetscTruth      roworiented=a->roworiented;
1385   const MatScalar *value = v;
1386   MatScalar       *ap,*aa = a->a,*bap;
1387 
1388   PetscFunctionBegin;
1389   if (roworiented) {
1390     stepval = (n-1)*bs;
1391   } else {
1392     stepval = (m-1)*bs;
1393   }
1394   for (k=0; k<m; k++) { /* loop over added rows */
1395     row  = im[k];
1396     if (row < 0) continue;
1397 #if defined(PETSC_USE_DEBUG)
1398     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1399 #endif
1400     rp   = aj + ai[row];
1401     ap   = aa + bs2*ai[row];
1402     rmax = imax[row];
1403     nrow = ailen[row];
1404     low  = 0;
1405     high = nrow;
1406     for (l=0; l<n; l++) { /* loop over added columns */
1407       if (in[l] < 0) continue;
1408 #if defined(PETSC_USE_DEBUG)
1409       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1410 #endif
1411       col = in[l];
1412       if (roworiented) {
1413         value = v + k*(stepval+bs)*bs + l*bs;
1414       } else {
1415         value = v + l*(stepval+bs)*bs + k*bs;
1416       }
1417       if (col < lastcol) low = 0; else high = nrow;
1418       lastcol = col;
1419       while (high-low > 7) {
1420         t = (low+high)/2;
1421         if (rp[t] > col) high = t;
1422         else             low  = t;
1423       }
1424       for (i=low; i<high; i++) {
1425         if (rp[i] > col) break;
1426         if (rp[i] == col) {
1427           bap  = ap +  bs2*i;
1428           if (roworiented) {
1429             if (is == ADD_VALUES) {
1430               for (ii=0; ii<bs; ii++,value+=stepval) {
1431                 for (jj=ii; jj<bs2; jj+=bs) {
1432                   bap[jj] += *value++;
1433                 }
1434               }
1435             } else {
1436               for (ii=0; ii<bs; ii++,value+=stepval) {
1437                 for (jj=ii; jj<bs2; jj+=bs) {
1438                   bap[jj] = *value++;
1439                 }
1440               }
1441             }
1442           } else {
1443             if (is == ADD_VALUES) {
1444               for (ii=0; ii<bs; ii++,value+=stepval) {
1445                 for (jj=0; jj<bs; jj++) {
1446                   *bap++ += *value++;
1447                 }
1448               }
1449             } else {
1450               for (ii=0; ii<bs; ii++,value+=stepval) {
1451                 for (jj=0; jj<bs; jj++) {
1452                   *bap++  = *value++;
1453                 }
1454               }
1455             }
1456           }
1457           goto noinsert2;
1458         }
1459       }
1460       if (nonew == 1) goto noinsert2;
1461       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1462       MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
1463       N = nrow++ - 1;
1464       /* shift up all the later entries in this row */
1465       for (ii=N; ii>=i; ii--) {
1466         rp[ii+1] = rp[ii];
1467         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1468       }
1469       if (N >= i) {
1470         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1471       }
1472       rp[i] = col;
1473       bap   = ap +  bs2*i;
1474       if (roworiented) {
1475         for (ii=0; ii<bs; ii++,value+=stepval) {
1476           for (jj=ii; jj<bs2; jj+=bs) {
1477             bap[jj] = *value++;
1478           }
1479         }
1480       } else {
1481         for (ii=0; ii<bs; ii++,value+=stepval) {
1482           for (jj=0; jj<bs; jj++) {
1483             *bap++  = *value++;
1484           }
1485         }
1486       }
1487       noinsert2:;
1488       low = i;
1489     }
1490     ailen[row] = nrow;
1491   }
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 #undef __FUNCT__
1496 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1497 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1498 {
1499   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1500   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1501   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
1502   PetscErrorCode ierr;
1503   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1504   MatScalar      *aa = a->a,*ap;
1505   PetscReal      ratio=0.6;
1506 
1507   PetscFunctionBegin;
1508   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1509 
1510   if (m) rmax = ailen[0];
1511   for (i=1; i<mbs; i++) {
1512     /* move each row back by the amount of empty slots (fshift) before it*/
1513     fshift += imax[i-1] - ailen[i-1];
1514     rmax   = PetscMax(rmax,ailen[i]);
1515     if (fshift) {
1516       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1517       N = ailen[i];
1518       for (j=0; j<N; j++) {
1519         ip[j-fshift] = ip[j];
1520         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1521       }
1522     }
1523     ai[i] = ai[i-1] + ailen[i-1];
1524   }
1525   if (mbs) {
1526     fshift += imax[mbs-1] - ailen[mbs-1];
1527     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1528   }
1529   /* reset ilen and imax for each row */
1530   for (i=0; i<mbs; i++) {
1531     ailen[i] = imax[i] = ai[i+1] - ai[i];
1532   }
1533   a->nz = ai[mbs];
1534 
1535   /* diagonals may have moved, so kill the diagonal pointers */
1536   a->idiagvalid = PETSC_FALSE;
1537   if (fshift && a->diag) {
1538     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1539     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1540     a->diag = 0;
1541   }
1542   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr);
1543   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr);
1544   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr);
1545   a->reallocs          = 0;
1546   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1547 
1548   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1549   if (a->compressedrow.use){
1550     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1551   }
1552 
1553   A->same_nonzero = PETSC_TRUE;
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 /*
1558    This function returns an array of flags which indicate the locations of contiguous
1559    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1560    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1561    Assume: sizes should be long enough to hold all the values.
1562 */
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1565 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1566 {
1567   PetscInt   i,j,k,row;
1568   PetscTruth flg;
1569 
1570   PetscFunctionBegin;
1571   for (i=0,j=0; i<n; j++) {
1572     row = idx[i];
1573     if (row%bs!=0) { /* Not the begining of a block */
1574       sizes[j] = 1;
1575       i++;
1576     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1577       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1578       i++;
1579     } else { /* Begining of the block, so check if the complete block exists */
1580       flg = PETSC_TRUE;
1581       for (k=1; k<bs; k++) {
1582         if (row+k != idx[i+k]) { /* break in the block */
1583           flg = PETSC_FALSE;
1584           break;
1585         }
1586       }
1587       if (flg) { /* No break in the bs */
1588         sizes[j] = bs;
1589         i+= bs;
1590       } else {
1591         sizes[j] = 1;
1592         i++;
1593       }
1594     }
1595   }
1596   *bs_max = j;
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1602 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1603 {
1604   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1605   PetscErrorCode ierr;
1606   PetscInt       i,j,k,count,*rows;
1607   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
1608   PetscScalar    zero = 0.0;
1609   MatScalar      *aa;
1610 
1611   PetscFunctionBegin;
1612   /* Make a copy of the IS and  sort it */
1613   /* allocate memory for rows,sizes */
1614   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1615   sizes = rows + is_n;
1616 
1617   /* copy IS values to rows, and sort them */
1618   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1619   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1620   if (baij->keepzeroedrows) {
1621     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1622     bs_max = is_n;
1623     A->same_nonzero = PETSC_TRUE;
1624   } else {
1625     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1626     A->same_nonzero = PETSC_FALSE;
1627   }
1628 
1629   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1630     row   = rows[j];
1631     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1632     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1633     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1634     if (sizes[i] == bs && !baij->keepzeroedrows) {
1635       if (diag != 0.0) {
1636         if (baij->ilen[row/bs] > 0) {
1637           baij->ilen[row/bs]       = 1;
1638           baij->j[baij->i[row/bs]] = row/bs;
1639           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1640         }
1641         /* Now insert all the diagonal values for this bs */
1642         for (k=0; k<bs; k++) {
1643           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1644         }
1645       } else { /* (diag == 0.0) */
1646         baij->ilen[row/bs] = 0;
1647       } /* end (diag == 0.0) */
1648     } else { /* (sizes[i] != bs) */
1649 #if defined (PETSC_USE_DEBUG)
1650       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1651 #endif
1652       for (k=0; k<count; k++) {
1653         aa[0] =  zero;
1654         aa    += bs;
1655       }
1656       if (diag != 0.0) {
1657         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1658       }
1659     }
1660   }
1661 
1662   ierr = PetscFree(rows);CHKERRQ(ierr);
1663   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 #undef __FUNCT__
1668 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1669 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1670 {
1671   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1672   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1673   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1674   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
1675   PetscErrorCode ierr;
1676   PetscInt       ridx,cidx,bs2=a->bs2;
1677   PetscTruth     roworiented=a->roworiented;
1678   MatScalar      *ap,value,*aa=a->a,*bap;
1679 
1680   PetscFunctionBegin;
1681   for (k=0; k<m; k++) { /* loop over added rows */
1682     row  = im[k];
1683     brow = row/bs;
1684     if (row < 0) continue;
1685 #if defined(PETSC_USE_DEBUG)
1686     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1687 #endif
1688     rp   = aj + ai[brow];
1689     ap   = aa + bs2*ai[brow];
1690     rmax = imax[brow];
1691     nrow = ailen[brow];
1692     low  = 0;
1693     high = nrow;
1694     for (l=0; l<n; l++) { /* loop over added columns */
1695       if (in[l] < 0) continue;
1696 #if defined(PETSC_USE_DEBUG)
1697       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1698 #endif
1699       col = in[l]; bcol = col/bs;
1700       ridx = row % bs; cidx = col % bs;
1701       if (roworiented) {
1702         value = v[l + k*n];
1703       } else {
1704         value = v[k + l*m];
1705       }
1706       if (col < lastcol) low = 0; else high = nrow;
1707       lastcol = col;
1708       while (high-low > 7) {
1709         t = (low+high)/2;
1710         if (rp[t] > bcol) high = t;
1711         else              low  = t;
1712       }
1713       for (i=low; i<high; i++) {
1714         if (rp[i] > bcol) break;
1715         if (rp[i] == bcol) {
1716           bap  = ap +  bs2*i + bs*cidx + ridx;
1717           if (is == ADD_VALUES) *bap += value;
1718           else                  *bap  = value;
1719           goto noinsert1;
1720         }
1721       }
1722       if (nonew == 1) goto noinsert1;
1723       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1724       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
1725       N = nrow++ - 1;
1726       /* shift up all the later entries in this row */
1727       for (ii=N; ii>=i; ii--) {
1728         rp[ii+1] = rp[ii];
1729         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1730       }
1731       if (N>=i) {
1732         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1733       }
1734       rp[i]                      = bcol;
1735       ap[bs2*i + bs*cidx + ridx] = value;
1736       a->nz++;
1737       noinsert1:;
1738       low = i;
1739     }
1740     ailen[brow] = nrow;
1741   }
1742   A->same_nonzero = PETSC_FALSE;
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1749 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1750 {
1751   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1752   Mat            outA;
1753   PetscErrorCode ierr;
1754   PetscTruth     row_identity,col_identity;
1755 
1756   PetscFunctionBegin;
1757   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1758   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1759   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1760   if (!row_identity || !col_identity) {
1761     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1762   }
1763 
1764   outA          = inA;
1765   inA->factor   = FACTOR_LU;
1766 
1767   if (!a->diag) {
1768     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1769   }
1770 
1771   a->row        = row;
1772   a->col        = col;
1773   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1774   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1775 
1776   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1777   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1778   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1779 
1780   /*
1781       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1782       for ILU(0) factorization with natural ordering
1783   */
1784   if (inA->bs < 8) {
1785     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1786   } else {
1787     if (!a->solve_work) {
1788       ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1789       ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1790     }
1791   }
1792 
1793   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1794 
1795   PetscFunctionReturn(0);
1796 }
1797 #undef __FUNCT__
1798 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1799 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1800 {
1801   static PetscTruth called = PETSC_FALSE;
1802   MPI_Comm          comm = A->comm;
1803   PetscErrorCode    ierr;
1804 
1805   PetscFunctionBegin;
1806   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1807   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1808   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 EXTERN_C_BEGIN
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1815 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1816 {
1817   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1818   PetscInt    i,nz,nbs;
1819 
1820   PetscFunctionBegin;
1821   nz  = baij->maxnz/baij->bs2;
1822   nbs = baij->nbs;
1823   for (i=0; i<nz; i++) {
1824     baij->j[i] = indices[i];
1825   }
1826   baij->nz = nz;
1827   for (i=0; i<nbs; i++) {
1828     baij->ilen[i] = baij->imax[i];
1829   }
1830 
1831   PetscFunctionReturn(0);
1832 }
1833 EXTERN_C_END
1834 
1835 #undef __FUNCT__
1836 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1837 /*@
1838     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1839        in the matrix.
1840 
1841   Input Parameters:
1842 +  mat - the SeqBAIJ matrix
1843 -  indices - the column indices
1844 
1845   Level: advanced
1846 
1847   Notes:
1848     This can be called if you have precomputed the nonzero structure of the
1849   matrix and want to provide it to the matrix object to improve the performance
1850   of the MatSetValues() operation.
1851 
1852     You MUST have set the correct numbers of nonzeros per row in the call to
1853   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1854 
1855     MUST be called before any calls to MatSetValues();
1856 
1857 @*/
1858 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1859 {
1860   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1861 
1862   PetscFunctionBegin;
1863   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1864   PetscValidPointer(indices,2);
1865   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1866   if (f) {
1867     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1868   } else {
1869     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1870   }
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1876 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1877 {
1878   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1879   PetscErrorCode ierr;
1880   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1881   PetscReal      atmp;
1882   PetscScalar    *x,zero = 0.0;
1883   MatScalar      *aa;
1884   PetscInt       ncols,brow,krow,kcol;
1885 
1886   PetscFunctionBegin;
1887   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1888   bs   = A->bs;
1889   aa   = a->a;
1890   ai   = a->i;
1891   aj   = a->j;
1892   mbs = a->mbs;
1893 
1894   ierr = VecSet(v,zero);CHKERRQ(ierr);
1895   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1896   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1897   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1898   for (i=0; i<mbs; i++) {
1899     ncols = ai[1] - ai[0]; ai++;
1900     brow  = bs*i;
1901     for (j=0; j<ncols; j++){
1902       /* bcol = bs*(*aj); */
1903       for (kcol=0; kcol<bs; kcol++){
1904         for (krow=0; krow<bs; krow++){
1905           atmp = PetscAbsScalar(*aa); aa++;
1906           row = brow + krow;    /* row index */
1907           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1908           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1909         }
1910       }
1911       aj++;
1912     }
1913   }
1914   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1920 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1921 {
1922   PetscErrorCode ierr;
1923 
1924   PetscFunctionBegin;
1925   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1931 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1932 {
1933   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1934   PetscFunctionBegin;
1935   *array = a->a;
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 #undef __FUNCT__
1940 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1941 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1942 {
1943   PetscFunctionBegin;
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 #include "petscblaslapack.h"
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1950 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1951 {
1952   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1953   PetscErrorCode ierr;
1954   PetscInt       i,bs=Y->bs,j,bs2;
1955   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1956 
1957   PetscFunctionBegin;
1958   if (str == SAME_NONZERO_PATTERN) {
1959     PetscScalar alpha = a;
1960     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1961   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1962     if (y->xtoy && y->XtoY != X) {
1963       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1964       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1965     }
1966     if (!y->xtoy) { /* get xtoy */
1967       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1968       y->XtoY = X;
1969     }
1970     bs2 = bs*bs;
1971     for (i=0; i<x->nz; i++) {
1972       j = 0;
1973       while (j < bs2){
1974         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1975         j++;
1976       }
1977     }
1978     ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
1979   } else {
1980     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1981   }
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 /* -------------------------------------------------------------------*/
1986 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1987        MatGetRow_SeqBAIJ,
1988        MatRestoreRow_SeqBAIJ,
1989        MatMult_SeqBAIJ_N,
1990 /* 4*/ MatMultAdd_SeqBAIJ_N,
1991        MatMultTranspose_SeqBAIJ,
1992        MatMultTransposeAdd_SeqBAIJ,
1993        MatSolve_SeqBAIJ_N,
1994        0,
1995        0,
1996 /*10*/ 0,
1997        MatLUFactor_SeqBAIJ,
1998        0,
1999        0,
2000        MatTranspose_SeqBAIJ,
2001 /*15*/ MatGetInfo_SeqBAIJ,
2002        MatEqual_SeqBAIJ,
2003        MatGetDiagonal_SeqBAIJ,
2004        MatDiagonalScale_SeqBAIJ,
2005        MatNorm_SeqBAIJ,
2006 /*20*/ 0,
2007        MatAssemblyEnd_SeqBAIJ,
2008        0,
2009        MatSetOption_SeqBAIJ,
2010        MatZeroEntries_SeqBAIJ,
2011 /*25*/ MatZeroRows_SeqBAIJ,
2012        MatLUFactorSymbolic_SeqBAIJ,
2013        MatLUFactorNumeric_SeqBAIJ_N,
2014        MatCholeskyFactorSymbolic_SeqBAIJ,
2015        MatCholeskyFactorNumeric_SeqBAIJ_N,
2016 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2017        MatILUFactorSymbolic_SeqBAIJ,
2018        MatICCFactorSymbolic_SeqBAIJ,
2019        MatGetArray_SeqBAIJ,
2020        MatRestoreArray_SeqBAIJ,
2021 /*35*/ MatDuplicate_SeqBAIJ,
2022        0,
2023        0,
2024        MatILUFactor_SeqBAIJ,
2025        0,
2026 /*40*/ MatAXPY_SeqBAIJ,
2027        MatGetSubMatrices_SeqBAIJ,
2028        MatIncreaseOverlap_SeqBAIJ,
2029        MatGetValues_SeqBAIJ,
2030        0,
2031 /*45*/ MatPrintHelp_SeqBAIJ,
2032        MatScale_SeqBAIJ,
2033        0,
2034        0,
2035        0,
2036 /*50*/ 0,
2037        MatGetRowIJ_SeqBAIJ,
2038        MatRestoreRowIJ_SeqBAIJ,
2039        0,
2040        0,
2041 /*55*/ 0,
2042        0,
2043        0,
2044        0,
2045        MatSetValuesBlocked_SeqBAIJ,
2046 /*60*/ MatGetSubMatrix_SeqBAIJ,
2047        MatDestroy_SeqBAIJ,
2048        MatView_SeqBAIJ,
2049        MatGetPetscMaps_Petsc,
2050        0,
2051 /*65*/ 0,
2052        0,
2053        0,
2054        0,
2055        0,
2056 /*70*/ MatGetRowMax_SeqBAIJ,
2057        MatConvert_Basic,
2058        0,
2059        0,
2060        0,
2061 /*75*/ 0,
2062        0,
2063        0,
2064        0,
2065        0,
2066 /*80*/ 0,
2067        0,
2068        0,
2069        0,
2070        MatLoad_SeqBAIJ,
2071 /*85*/ 0,
2072        0,
2073        0,
2074        0,
2075        0,
2076 /*90*/ 0,
2077        0,
2078        0,
2079        0,
2080        0,
2081 /*95*/ 0,
2082        0,
2083        0,
2084        0};
2085 
2086 EXTERN_C_BEGIN
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2089 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2090 {
2091   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2092   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2093   PetscErrorCode ierr;
2094 
2095   PetscFunctionBegin;
2096   if (aij->nonew != 1) {
2097     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2098   }
2099 
2100   /* allocate space for values if not already there */
2101   if (!aij->saved_values) {
2102     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2103   }
2104 
2105   /* copy values over */
2106   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2107   PetscFunctionReturn(0);
2108 }
2109 EXTERN_C_END
2110 
2111 EXTERN_C_BEGIN
2112 #undef __FUNCT__
2113 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2114 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2115 {
2116   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2117   PetscErrorCode ierr;
2118   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2119 
2120   PetscFunctionBegin;
2121   if (aij->nonew != 1) {
2122     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2123   }
2124   if (!aij->saved_values) {
2125     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2126   }
2127 
2128   /* copy values over */
2129   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2130   PetscFunctionReturn(0);
2131 }
2132 EXTERN_C_END
2133 
2134 EXTERN_C_BEGIN
2135 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2136 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2137 EXTERN_C_END
2138 
2139 EXTERN_C_BEGIN
2140 #undef __FUNCT__
2141 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2142 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2143 {
2144   Mat_SeqBAIJ    *b;
2145   PetscErrorCode ierr;
2146   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2147   PetscTruth     flg,skipallocation = PETSC_FALSE;
2148 
2149   PetscFunctionBegin;
2150 
2151   if (nz == MAT_SKIP_ALLOCATION) {
2152     skipallocation = PETSC_TRUE;
2153     nz             = 0;
2154   }
2155 
2156   B->preallocated = PETSC_TRUE;
2157   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2158   if (nnz && newbs != bs) {
2159     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2160   }
2161   bs   = newbs;
2162 
2163   mbs  = B->m/bs;
2164   nbs  = B->n/bs;
2165   bs2  = bs*bs;
2166 
2167   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2168     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2169   }
2170 
2171   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2172   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2173   if (nnz) {
2174     for (i=0; i<mbs; i++) {
2175       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2176       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2177     }
2178   }
2179 
2180   b       = (Mat_SeqBAIJ*)B->data;
2181   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2182   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2183   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2184   if (!flg) {
2185     switch (bs) {
2186     case 1:
2187       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2188       B->ops->mult            = MatMult_SeqBAIJ_1;
2189       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2190       break;
2191     case 2:
2192       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2193       B->ops->mult            = MatMult_SeqBAIJ_2;
2194       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2195       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2196       break;
2197     case 3:
2198       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2199       B->ops->mult            = MatMult_SeqBAIJ_3;
2200       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2201       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2202       break;
2203     case 4:
2204       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2205       B->ops->mult            = MatMult_SeqBAIJ_4;
2206       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2207       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2208       break;
2209     case 5:
2210       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2211       B->ops->mult            = MatMult_SeqBAIJ_5;
2212       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2213       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2214       break;
2215     case 6:
2216       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2217       B->ops->mult            = MatMult_SeqBAIJ_6;
2218       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2219       break;
2220     case 7:
2221       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2222       B->ops->mult            = MatMult_SeqBAIJ_7;
2223       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2224       break;
2225     default:
2226       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2227       B->ops->mult            = MatMult_SeqBAIJ_N;
2228       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2229       break;
2230     }
2231   }
2232   B->bs      = bs;
2233   b->mbs     = mbs;
2234   b->nbs     = nbs;
2235   if (!skipallocation) {
2236     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2237     /* b->ilen will count nonzeros in each block row so far. */
2238     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2239     if (!nnz) {
2240       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2241       else if (nz <= 0)        nz = 1;
2242       for (i=0; i<mbs; i++) b->imax[i] = nz;
2243       nz = nz*mbs;
2244     } else {
2245       nz = 0;
2246       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2247     }
2248 
2249     /* allocate the matrix space */
2250     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr);
2251     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2252     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2253     b->singlemalloc = PETSC_TRUE;
2254 
2255     b->i[0] = 0;
2256     for (i=1; i<mbs+1; i++) {
2257       b->i[i] = b->i[i-1] + b->imax[i-1];
2258     }
2259   }
2260 
2261   B->bs               = bs;
2262   b->bs2              = bs2;
2263   b->mbs              = mbs;
2264   b->nz               = 0;
2265   b->maxnz            = nz*bs2;
2266   B->info.nz_unneeded = (PetscReal)b->maxnz;
2267   PetscFunctionReturn(0);
2268 }
2269 EXTERN_C_END
2270 
2271 /*MC
2272    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2273    block sparse compressed row format.
2274 
2275    Options Database Keys:
2276 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2277 
2278   Level: beginner
2279 
2280 .seealso: MatCreateSeqBAIJ
2281 M*/
2282 
2283 EXTERN_C_BEGIN
2284 #undef __FUNCT__
2285 #define __FUNCT__ "MatCreate_SeqBAIJ"
2286 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2287 {
2288   PetscErrorCode ierr;
2289   PetscMPIInt    size;
2290   Mat_SeqBAIJ    *b;
2291 
2292   PetscFunctionBegin;
2293   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2294   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2295 
2296   B->m = B->M = PetscMax(B->m,B->M);
2297   B->n = B->N = PetscMax(B->n,B->N);
2298   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2299   B->data = (void*)b;
2300   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2301   B->factor           = 0;
2302   B->mapping          = 0;
2303   b->row              = 0;
2304   b->col              = 0;
2305   b->icol             = 0;
2306   b->reallocs         = 0;
2307   b->saved_values     = 0;
2308 #if defined(PETSC_USE_MAT_SINGLE)
2309   b->setvalueslen     = 0;
2310   b->setvaluescopy    = PETSC_NULL;
2311 #endif
2312 
2313   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2314   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2315 
2316   b->sorted           = PETSC_FALSE;
2317   b->roworiented      = PETSC_TRUE;
2318   b->nonew            = 0;
2319   b->diag             = 0;
2320   b->solve_work       = 0;
2321   b->mult_work        = 0;
2322   B->spptr            = 0;
2323   B->info.nz_unneeded = (PetscReal)b->maxnz;
2324   b->keepzeroedrows   = PETSC_FALSE;
2325   b->xtoy              = 0;
2326   b->XtoY              = 0;
2327   b->compressedrow.use     = PETSC_FALSE;
2328   b->compressedrow.nrows   = 0;
2329   b->compressedrow.i       = PETSC_NULL;
2330   b->compressedrow.rindex  = PETSC_NULL;
2331   b->compressedrow.checked = PETSC_FALSE;
2332   B->same_nonzero          = PETSC_FALSE;
2333 
2334   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2335                                      "MatInvertBlockDiagonal_SeqBAIJ",
2336                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2337   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2338                                      "MatStoreValues_SeqBAIJ",
2339                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2340   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2341                                      "MatRetrieveValues_SeqBAIJ",
2342                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2343   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2344                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2345                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2346   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2347                                      "MatConvert_SeqBAIJ_SeqAIJ",
2348                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2349   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2350                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2351                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2352   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2353                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2354                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2355   PetscFunctionReturn(0);
2356 }
2357 EXTERN_C_END
2358 
2359 #undef __FUNCT__
2360 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2361 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2362 {
2363   Mat            C;
2364   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2365   PetscErrorCode ierr;
2366   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2367 
2368   PetscFunctionBegin;
2369   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2370 
2371   *B = 0;
2372   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2373   ierr = MatSetSizes(C,A->m,A->n,A->m,A->n);CHKERRQ(ierr);
2374   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2375   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2376   c    = (Mat_SeqBAIJ*)C->data;
2377 
2378   C->M   = A->M;
2379   C->N   = A->N;
2380   C->bs  = A->bs;
2381   c->bs2 = a->bs2;
2382   c->mbs = a->mbs;
2383   c->nbs = a->nbs;
2384 
2385   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2386   for (i=0; i<mbs; i++) {
2387     c->imax[i] = a->imax[i];
2388     c->ilen[i] = a->ilen[i];
2389   }
2390 
2391   /* allocate the matrix space */
2392   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2393   c->singlemalloc = PETSC_TRUE;
2394   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2395   if (mbs > 0) {
2396     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2397     if (cpvalues == MAT_COPY_VALUES) {
2398       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2399     } else {
2400       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2401     }
2402   }
2403   c->sorted      = a->sorted;
2404   c->roworiented = a->roworiented;
2405   c->nonew       = a->nonew;
2406 
2407   if (a->diag) {
2408     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2409     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2410     for (i=0; i<mbs; i++) {
2411       c->diag[i] = a->diag[i];
2412     }
2413   } else c->diag        = 0;
2414   c->nz                 = a->nz;
2415   c->maxnz              = a->maxnz;
2416   c->solve_work         = 0;
2417   c->mult_work          = 0;
2418   C->preallocated       = PETSC_TRUE;
2419   C->assembled          = PETSC_TRUE;
2420 
2421   c->compressedrow.use     = a->compressedrow.use;
2422   c->compressedrow.nrows   = a->compressedrow.nrows;
2423   c->compressedrow.checked = a->compressedrow.checked;
2424   if ( a->compressedrow.checked && a->compressedrow.use){
2425     i = a->compressedrow.nrows;
2426     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2427     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2428     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2429     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2430   } else {
2431     c->compressedrow.use    = PETSC_FALSE;
2432     c->compressedrow.i      = PETSC_NULL;
2433     c->compressedrow.rindex = PETSC_NULL;
2434   }
2435   C->same_nonzero = A->same_nonzero;
2436   *B = C;
2437   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2438   PetscFunctionReturn(0);
2439 }
2440 
2441 #undef __FUNCT__
2442 #define __FUNCT__ "MatLoad_SeqBAIJ"
2443 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2444 {
2445   Mat_SeqBAIJ    *a;
2446   Mat            B;
2447   PetscErrorCode ierr;
2448   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2449   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2450   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2451   PetscInt       *masked,nmask,tmp,bs2,ishift;
2452   PetscMPIInt    size;
2453   int            fd;
2454   PetscScalar    *aa;
2455   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2456 
2457   PetscFunctionBegin;
2458   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2459   bs2  = bs*bs;
2460 
2461   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2462   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2463   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2464   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2465   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2466   M = header[1]; N = header[2]; nz = header[3];
2467 
2468   if (header[3] < 0) {
2469     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2470   }
2471 
2472   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2473 
2474   /*
2475      This code adds extra rows to make sure the number of rows is
2476     divisible by the blocksize
2477   */
2478   mbs        = M/bs;
2479   extra_rows = bs - M + bs*(mbs);
2480   if (extra_rows == bs) extra_rows = 0;
2481   else                  mbs++;
2482   if (extra_rows) {
2483     ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2484   }
2485 
2486   /* read in row lengths */
2487   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2488   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2489   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2490 
2491   /* read in column indices */
2492   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2493   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2494   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2495 
2496   /* loop over row lengths determining block row lengths */
2497   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2498   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2499   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2500   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2501   masked   = mask + mbs;
2502   rowcount = 0; nzcount = 0;
2503   for (i=0; i<mbs; i++) {
2504     nmask = 0;
2505     for (j=0; j<bs; j++) {
2506       kmax = rowlengths[rowcount];
2507       for (k=0; k<kmax; k++) {
2508         tmp = jj[nzcount++]/bs;
2509         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2510       }
2511       rowcount++;
2512     }
2513     browlengths[i] += nmask;
2514     /* zero out the mask elements we set */
2515     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2516   }
2517 
2518   /* create our matrix */
2519   ierr = MatCreate(comm,&B);
2520   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2521   ierr = MatSetType(B,type);CHKERRQ(ierr);
2522   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2523   a = (Mat_SeqBAIJ*)B->data;
2524 
2525   /* set matrix "i" values */
2526   a->i[0] = 0;
2527   for (i=1; i<= mbs; i++) {
2528     a->i[i]      = a->i[i-1] + browlengths[i-1];
2529     a->ilen[i-1] = browlengths[i-1];
2530   }
2531   a->nz         = 0;
2532   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2533 
2534   /* read in nonzero values */
2535   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2536   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2537   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2538 
2539   /* set "a" and "j" values into matrix */
2540   nzcount = 0; jcount = 0;
2541   for (i=0; i<mbs; i++) {
2542     nzcountb = nzcount;
2543     nmask    = 0;
2544     for (j=0; j<bs; j++) {
2545       kmax = rowlengths[i*bs+j];
2546       for (k=0; k<kmax; k++) {
2547         tmp = jj[nzcount++]/bs;
2548 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2549       }
2550     }
2551     /* sort the masked values */
2552     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2553 
2554     /* set "j" values into matrix */
2555     maskcount = 1;
2556     for (j=0; j<nmask; j++) {
2557       a->j[jcount++]  = masked[j];
2558       mask[masked[j]] = maskcount++;
2559     }
2560     /* set "a" values into matrix */
2561     ishift = bs2*a->i[i];
2562     for (j=0; j<bs; j++) {
2563       kmax = rowlengths[i*bs+j];
2564       for (k=0; k<kmax; k++) {
2565         tmp       = jj[nzcountb]/bs ;
2566         block     = mask[tmp] - 1;
2567         point     = jj[nzcountb] - bs*tmp;
2568         idx       = ishift + bs2*block + j + bs*point;
2569         a->a[idx] = (MatScalar)aa[nzcountb++];
2570       }
2571     }
2572     /* zero out the mask elements we set */
2573     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2574   }
2575   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2576 
2577   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2578   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2579   ierr = PetscFree(aa);CHKERRQ(ierr);
2580   ierr = PetscFree(jj);CHKERRQ(ierr);
2581   ierr = PetscFree(mask);CHKERRQ(ierr);
2582 
2583   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2584   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2585   ierr = MatView_Private(B);CHKERRQ(ierr);
2586 
2587   *A = B;
2588   PetscFunctionReturn(0);
2589 }
2590 
2591 #undef __FUNCT__
2592 #define __FUNCT__ "MatCreateSeqBAIJ"
2593 /*@C
2594    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2595    compressed row) format.  For good matrix assembly performance the
2596    user should preallocate the matrix storage by setting the parameter nz
2597    (or the array nnz).  By setting these parameters accurately, performance
2598    during matrix assembly can be increased by more than a factor of 50.
2599 
2600    Collective on MPI_Comm
2601 
2602    Input Parameters:
2603 +  comm - MPI communicator, set to PETSC_COMM_SELF
2604 .  bs - size of block
2605 .  m - number of rows
2606 .  n - number of columns
2607 .  nz - number of nonzero blocks  per block row (same for all rows)
2608 -  nnz - array containing the number of nonzero blocks in the various block rows
2609          (possibly different for each block row) or PETSC_NULL
2610 
2611    Output Parameter:
2612 .  A - the matrix
2613 
2614    Options Database Keys:
2615 .   -mat_no_unroll - uses code that does not unroll the loops in the
2616                      block calculations (much slower)
2617 .    -mat_block_size - size of the blocks to use
2618 
2619    Level: intermediate
2620 
2621    Notes:
2622    The number of rows and columns must be divisible by blocksize.
2623 
2624    If the nnz parameter is given then the nz parameter is ignored
2625 
2626    A nonzero block is any block that as 1 or more nonzeros in it
2627 
2628    The block AIJ format is fully compatible with standard Fortran 77
2629    storage.  That is, the stored row and column indices can begin at
2630    either one (as in Fortran) or zero.  See the users' manual for details.
2631 
2632    Specify the preallocated storage with either nz or nnz (not both).
2633    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2634    allocation.  For additional details, see the users manual chapter on
2635    matrices.
2636 
2637 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2638 @*/
2639 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2640 {
2641   PetscErrorCode ierr;
2642 
2643   PetscFunctionBegin;
2644   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2645   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2646   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2647   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2648   PetscFunctionReturn(0);
2649 }
2650 
2651 #undef __FUNCT__
2652 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2653 /*@C
2654    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2655    per row in the matrix. For good matrix assembly performance the
2656    user should preallocate the matrix storage by setting the parameter nz
2657    (or the array nnz).  By setting these parameters accurately, performance
2658    during matrix assembly can be increased by more than a factor of 50.
2659 
2660    Collective on MPI_Comm
2661 
2662    Input Parameters:
2663 +  A - the matrix
2664 .  bs - size of block
2665 .  nz - number of block nonzeros per block row (same for all rows)
2666 -  nnz - array containing the number of block nonzeros in the various block rows
2667          (possibly different for each block row) or PETSC_NULL
2668 
2669    Options Database Keys:
2670 .   -mat_no_unroll - uses code that does not unroll the loops in the
2671                      block calculations (much slower)
2672 .    -mat_block_size - size of the blocks to use
2673 
2674    Level: intermediate
2675 
2676    Notes:
2677    If the nnz parameter is given then the nz parameter is ignored
2678 
2679    The block AIJ format is fully compatible with standard Fortran 77
2680    storage.  That is, the stored row and column indices can begin at
2681    either one (as in Fortran) or zero.  See the users' manual for details.
2682 
2683    Specify the preallocated storage with either nz or nnz (not both).
2684    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2685    allocation.  For additional details, see the users manual chapter on
2686    matrices.
2687 
2688 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2689 @*/
2690 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2691 {
2692   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2693 
2694   PetscFunctionBegin;
2695   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2696   if (f) {
2697     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2698   }
2699   PetscFunctionReturn(0);
2700 }
2701 
2702